##——— call libraries –

library("genefilter")
library("ggplot2")
library("grid");  library("reshape2")
library("ggrepel")
library("RColorBrewer")
library("DESeq2")
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following objects are masked from 'package:genefilter':
## 
##     rowSds, rowVars
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum
library("BiocParallel")
library("WGCNA")
## Loading required package: dynamicTreeCut
## Loading required package: fastcluster
## 
## Attaching package: 'fastcluster'
## The following object is masked from 'package:stats':
## 
##     hclust
## 
## 
## Attaching package: 'WGCNA'
## The following object is masked from 'package:IRanges':
## 
##     cor
## The following object is masked from 'package:S4Vectors':
## 
##     cor
## The following object is masked from 'package:stats':
## 
##     cor
library("Rtsne")
library("pheatmap")
library("tools")
library("viridis")
## Loading required package: viridisLite
library("scales")
## 
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
library("gridExtra")
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
library("GSVA")
library("GSEABase")
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: XML
## 
## Attaching package: 'XML'
## The following object is masked from 'package:tools':
## 
##     toHTML
## Loading required package: graph
## 
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
## 
##     addNode
library("stringr")
## 
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
## 
##     boundary
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:GSEABase':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:graph':
## 
##     union
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("rstatix")
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:IRanges':
## 
##     desc
## The following object is masked from 'package:genefilter':
## 
##     Anova
## The following object is masked from 'package:stats':
## 
##     filter
library("tidyr")
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
## The following object is masked from 'package:reshape2':
## 
##     smiths
source('D:/Pembro-Fluvac/Analysis/Ranalysis/PembroFluSpec_PlottingFunctions.R')
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
## 
##     space
## The following object is masked from 'package:S4Vectors':
## 
##     space
## The following object is masked from 'package:stats':
## 
##     lowess
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:rstatix':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:genefilter':
## 
##     area
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18362)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
##  [1] tools     parallel  stats4    grid      stats     graphics  grDevices
##  [8] utils     datasets  methods   base     
## 
## other attached packages:
##  [1] e1071_1.7-3                 MASS_7.3-51.6              
##  [3] gplots_3.0.4                tidyr_1.1.1                
##  [5] rstatix_0.6.0               dplyr_1.0.2                
##  [7] stringr_1.4.0               GSEABase_1.50.1            
##  [9] graph_1.66.0                annotate_1.66.0            
## [11] XML_3.99-0.5                AnnotationDbi_1.50.3       
## [13] GSVA_1.36.2                 gridExtra_2.3              
## [15] scales_1.1.1                viridis_0.5.1              
## [17] viridisLite_0.3.0           pheatmap_1.0.12            
## [19] Rtsne_0.15                  WGCNA_1.69                 
## [21] fastcluster_1.1.25          dynamicTreeCut_1.63-1      
## [23] BiocParallel_1.22.0         DESeq2_1.28.1              
## [25] SummarizedExperiment_1.18.2 DelayedArray_0.14.1        
## [27] matrixStats_0.56.0          Biobase_2.48.0             
## [29] GenomicRanges_1.40.0        GenomeInfoDb_1.24.2        
## [31] IRanges_2.22.2              S4Vectors_0.26.1           
## [33] BiocGenerics_0.34.0         RColorBrewer_1.1-2         
## [35] ggrepel_0.8.2               reshape2_1.4.4             
## [37] ggplot2_3.3.2               genefilter_1.70.0          
## 
## loaded via a namespace (and not attached):
##  [1] readxl_1.3.1           backports_1.1.9        Hmisc_4.4-1           
##  [4] plyr_1.8.6             splines_4.0.2          digest_0.6.25         
##  [7] foreach_1.5.0          htmltools_0.5.0        GO.db_3.11.4          
## [10] gdata_2.18.0           magrittr_1.5           checkmate_2.0.0       
## [13] memoise_1.1.0          cluster_2.1.0          doParallel_1.0.15     
## [16] openxlsx_4.1.5         jpeg_0.1-8.1           colorspace_1.4-1      
## [19] blob_1.2.1             haven_2.3.1            xfun_0.15             
## [22] crayon_1.3.4           RCurl_1.98-1.2         impute_1.62.0         
## [25] survival_3.2-3         iterators_1.0.12       glue_1.4.1            
## [28] gtable_0.3.0           zlibbioc_1.34.0        XVector_0.28.0        
## [31] car_3.0-9              abind_1.4-5            DBI_1.1.0             
## [34] Rcpp_1.0.5             xtable_1.8-4           htmlTable_2.0.1       
## [37] foreign_0.8-80         bit_4.0.4              preprocessCore_1.50.0 
## [40] Formula_1.2-3          htmlwidgets_1.5.3      ellipsis_0.3.1        
## [43] pkgconfig_2.0.3        nnet_7.3-14            locfit_1.5-9.4        
## [46] tidyselect_1.1.0       rlang_0.4.7            later_1.1.0.1         
## [49] munsell_0.5.0          cellranger_1.1.0       generics_0.0.2        
## [52] RSQLite_2.2.0          broom_0.7.0            evaluate_0.14         
## [55] fastmap_1.0.1          yaml_2.2.1             knitr_1.29            
## [58] bit64_4.0.2            zip_2.1.0              caTools_1.18.0        
## [61] purrr_0.3.4            mime_0.9               compiler_4.0.2        
## [64] shinythemes_1.1.2      rstudioapi_0.11        curl_4.3              
## [67] png_0.1-7              tibble_3.0.3           geneplotter_1.66.0    
## [70] stringi_1.4.6          highr_0.8              forcats_0.5.0         
## [73] lattice_0.20-41        Matrix_1.2-18          vctrs_0.3.2           
## [76] pillar_1.4.6           lifecycle_0.2.0        data.table_1.13.0     
## [79] bitops_1.0-6           httpuv_1.5.4           R6_2.4.1              
## [82] latticeExtra_0.6-29    promises_1.1.1         KernSmooth_2.23-17    
## [85] rio_0.5.16             codetools_0.2-16       gtools_3.8.2          
## [88] withr_2.2.0            GenomeInfoDbData_1.2.3 hms_0.5.3             
## [91] rpart_4.1-15           class_7.3-17           rmarkdown_2.7         
## [94] carData_3.0-4          shiny_1.5.0            base64enc_0.1-3

———– merge & qc data from spreadsheets ——————–

setwd("D:/Pembro-Fluvac/Analysis")
mergedData <- Tflow <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Tflow_freqParent_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
print("Data by year and cohort for T cell flow cytometry")
## [1] "Data by year and cohort for T cell flow cytometry"
table(mergedData$Cohort, mergedData$TimeCategory, mergedData$Year)
## , ,  = 1
## 
##          
##           baseline late oneWeek
##   aPD1           4    1       3
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 2
## 
##          
##           baseline late oneWeek
##   aPD1           3    2       3
##   Healthy       12   11      12
##   nonPD1         1    1       1
## 
## , ,  = 3
## 
##          
##           baseline late oneWeek
##   aPD1          16   11      16
##   Healthy       15    3      15
##   nonPD1         1    0       1
fit <- lm(data=mergedData[ , - which( colnames(mergedData) == "Label" | colnames(mergedData) == "Subject" | colnames(mergedData) == "Cohort" | 
                                        colnames(mergedData) == "TimeCategory" | colnames(mergedData) == "TimePoint")], 
          formula = TflowBatch  ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(mergedData) [ grep( paste(a, collapse="|"), colnames(mergedData)) ] <- paste0( colnames(mergedData)[ grep( paste(a, collapse="|"), colnames(mergedData)) ], ".batchEffect" ) }

serology <- read.csv(file= "D:/Pembro-Fluvac/Analysis/mergedData/SerologyMeasurements.csv", stringsAsFactors = F, header = T)
print("Data by year and cohort for serology")
## [1] "Data by year and cohort for serology"
table(serology$Cohort, serology$TimeCategory, serology$Year)
## , ,  = 1
## 
##          
##           baseline late oneWeek
##   aPD1           4    4       3
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 2
## 
##          
##           baseline late oneWeek
##   aPD1          12    9       3
##   Healthy       12   12      12
##   nonPD1         3    3       1
## 
## , ,  = 3
## 
##          
##           baseline late oneWeek
##   aPD1          16   13      16
##   Healthy       15   15      15
##   nonPD1         1    1       1
temp1 <- merge(x = mergedData, y=serology, all = T, suffixes = c(".Tflow",".Serology"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))

serologyKd <- read.csv(file="D:/Pembro-Fluvac/Analysis/mergedData/Serology_Kd.csv", header=T)
print("Data by year and cohort for 1/Kd measurements")
## [1] "Data by year and cohort for 1/Kd measurements"
table(serologyKd$Cohort, serologyKd$TimeCategory, serologyKd$Year)
## , ,  = 1
## 
##          
##           baseline late oneWeek
##   aPD1           4    4       3
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 2
## 
##          
##           baseline late oneWeek
##   aPD1          12    8       3
##   Healthy       12   12      12
##   nonPD1         3    3       1
## 
## , ,  = 3
## 
##          
##           baseline late oneWeek
##   aPD1          16   13      16
##   Healthy       15   15      15
##   nonPD1         1    1       1
temp1 <- merge(x = temp1, y=serologyKd, all = T, suffixes = c(".Tflow",".SerologyKd"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year','Visit'))


Bflow <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Bflow_freqParent_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
print("Data by year and cohort for B cell flow cytometry")
## [1] "Data by year and cohort for B cell flow cytometry"
table(Bflow$Cohort, Bflow$TimeCategory, Bflow$Year)
## , ,  = 3
## 
##          
##           baseline late oneWeek
##   aPD1          16   13      16
##   Healthy       15    7      14
##   nonPD1         1    0       1
BflowBATCH <- Bflow[, -c( grep( colnames(Bflow), pattern=paste0(c("Naive","Lymphocytes", "CD4","CD19hi_..FreqParent","CD19hi_CD11c","CD19hi_CD16","CD19hi_CD23","CD1hi9_CD25",
                                                             "CD19hi_CD32","CD19hi_CD38lo...FreqParent","CD19hi_CD80","CD19hi_CD86","CD19hi_IgM","CD19hi_CD138",
                                                             "CD19hi_CXCR4","CD19hi_CD71","CD19hi_Ki67..CXCR4","CD19hi_Ki67..Ki67","CD38lo...FreqParent",
                                                             "CD19hi_Tbet"), collapse ="|")) )]    # cut out columns to test for batch fx, else variables > observations
fit <- lm(data=BflowBATCH[ , - which( colnames(BflowBATCH) == "Label" | colnames(BflowBATCH) == "Subject" | colnames(BflowBATCH) == "Cohort" | 
                                        colnames(BflowBATCH) == "TimeCategory" | colnames(BflowBATCH) == "TimePoint")], 
          formula = BflowBatch  ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(Bflow) [ grep( paste(a, collapse="|"), colnames(Bflow)) ] <- paste0( colnames(Bflow)[ grep( paste(a, collapse="|"), colnames(Bflow)) ], ".batchEffect" )}
temp2 <- merge(x = temp1, y=Bflow, all = T, suffixes = c(".Tflow",".Bflow"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))


Tmfi <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Tflow_medianFI_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
Tmfi <- Tmfi[, -c(grep( colnames(Tmfi), pattern=paste0(c("Dead","medfi.CD20","medfi.CD4","Freq","ICOSlo"), collapse ="|")))]  # cut out columns to test for batch fx, else variables > observations
fit <- lm(data=Tmfi[ , - which( colnames(Tmfi) == "Label" | colnames(Tmfi) == "Subject" | colnames(Tmfi) == "Cohort" | 
                                   colnames(Tmfi) == "TimeCategory" | colnames(Tmfi) == "TimePoint")], 
          formula = TmfiBatch  ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(Tmfi) [ grep( paste(a, collapse="|"), colnames(Tmfi)) ] <- paste0( colnames(Tmfi)[ grep( paste(a, collapse="|"), colnames(Tmfi)) ], ".batchEffect" )}

temp3 <- merge(x = temp2, y=Tmfi, all = T, suffixes = c(".one",".Tmedfi"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))

demog <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/clinicalMetadata.csv", stringsAsFactors = F, header=T); 
demog$TimePoint <- demog$Visit <- NULL
demog$DateFirstIRAE <- strptime(demog$DateFirstIRAE, format = "%Y%m%d")
demog$DateFirstFlowFile <- strptime(demog$DateFirstFlowFile, format = "%Y%m%d")
temp4 <- merge(x = temp3, y=demog, all=T, suffixes = c(".two",".demog"), by = c('Subject', 'TimeCategory', 'Year','Cohort'))


Bmfi <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Bflow_Bmfi_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
fit <- lm(data=Bmfi[ , - which( colnames(Bmfi) == "Label" | colnames(Bmfi) == "Subject" | colnames(Bmfi) == "Cohort" | 
                                    colnames(Bmfi) == "TimeCategory" | colnames(Bmfi) == "TimePoint")], 
          formula = BmfiBatch  ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(Bmfi) [ grep( paste(a, collapse="|"), colnames(Bmfi)) ] <- paste0( colnames(Bmfi)[ grep( paste(a, collapse="|"), colnames(Bmfi)) ], ".batchEffect" )}
temp5 <- merge(x = temp4, y=Bmfi, all = T, suffixes = c(".one",".TBpmedfi"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))


TBpmfi <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Bflow_TmfiFromB_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
fit <- lm(data=TBpmfi[ , - which( colnames(TBpmfi) == "Label" | colnames(TBpmfi) == "Subject" | colnames(TBpmfi) == "Cohort" | 
                                  colnames(TBpmfi) == "TimeCategory" | colnames(TBpmfi) == "TimePoint")], 
          formula = TfromBBatch  ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(TBpmfi) [ grep( paste(a, collapse="|"), colnames(TBpmfi)) ] <- paste0( colnames(TBpmfi)[ grep( paste(a, collapse="|"), colnames(TBpmfi)) ], ".batchEffect" )}
temp6 <- merge(x = temp5, y=TBpmfi, all = T, suffixes = c(".one",".TBpmedfi"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))


mergedData <- temp6

index <- demog[which(demog$Current.Immunotherapy == 'Ipi/Nivo'),"Subject"]
mergedData$Cohort[which(mergedData$Subject %in% index)] <- "nonPD1"      # exclude combo therapy


rm(Bflow); rm(temp1); rm(temp2); rm(temp3); rm(temp4); rm(temp5); rm(temp6)

———– calculate fold-changes for HAI, Tfh, and PB ——————–

mergedData$logHAI <- log(mergedData$H1N1pdm09.HAI.titer)
subsetData <- subset(mergedData, TimeCategory != "oneWeek" & Cohort != "nonPD1" )
FC_response <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("logHAI"))
FC_response$FChai_late <- FC_response$`late`/FC_response$`baseline`

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" )
FC_response2 <- dcast( subset(subsetData, cTfh_ICOShiCD38hi_..FreqParent > 0), `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")) 
FC_response2$FCtfh_oW <- FC_response2$`oneWeek`/FC_response2$`baseline`; FC_response2$Cohort <- NULL
FC_response <- merge(x=FC_response, y=FC_response2, by = "Subject")
FC_response2 <- dcast( subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent")) 
FC_response2$FCPB_oW <- FC_response2$`oneWeek`/FC_response2$`baseline`; FC_response2$Cohort <- NULL
FC_response <- merge(x=FC_response, y=FC_response2, by = "Subject")
FC_response2 <- dcast( subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("Plasma.CXCL13..pg.mL.")) 
FC_response2$FCCXCL13_oW <- FC_response2$`oneWeek`/FC_response2$`baseline`; FC_response2$Cohort <- NULL
FC_response <- merge(x=FC_response, y=FC_response2, by = "Subject")
## Warning in merge.data.frame(x = FC_response, y = FC_response2, by = "Subject"):
## column names 'baseline.x', 'baseline.y' are duplicated in the result
FC_response2 <- dcast( subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgG1_Total.sialylated")) 
FC_response2$IgG1sial_oW <- FC_response2$`oneWeek`/FC_response2$`baseline`; FC_response2$Cohort <- NULL
FC_response <- merge(x=FC_response, y=FC_response2, by = "Subject")
## Warning in merge.data.frame(x = FC_response, y = FC_response2, by = "Subject"):
## column names 'baseline.x', 'baseline.y', 'oneWeek.x', 'oneWeek.y' are duplicated
## in the result
FC_response <- FC_response[, c("Subject","Cohort","FChai_late","FCtfh_oW", "FCPB_oW", "FCCXCL13_oW", "IgG1sial_oW")]
mergedData <- merge(x = mergedData, y= FC_response, all=T, by = c('Subject', 'Cohort'))

mergedData$ASC_ABCratio <- mergedData$IgDlo_CD71hi_CD20loCD71hi...FreqParent / mergedData$IgDlo_CD71hi_ActBCells...FreqParent

# write.csv(mergedData, file = "D:/Pembro-Fluvac/Analysis/mergedData/allMergedData.csv")


# mergedData <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/allMergedData.csv", stringsAsFactors = F, header = T, row.names = 1)

mergedData$TimeCategory <- factor(mergedData$TimeCategory, levels = c("baseline", "oneWeek","late"))
mergedData$Cohort <- factor(mergedData$Cohort, levels = c("Healthy", "aPD1","nonPD1"))
mergedData$dummy <- "dummy"

———– Dataset overview ——————–

dataAvail <- mergedData[ , - grep(paste(c("TimePoint", "dummy", "Visit","Label"), collapse = "|"), colnames(mergedData))]
temp <- reshape(dataAvail, idvar = c('Subject', 'Cohort','Year'), timevar = c('TimeCategory'),direction = 'wide')
saveCohort <- temp[which(temp$Cohort != "nonPD1"),"Cohort"]
saveYear <- temp[which(temp$Cohort != "nonPD1"),"Year"]
simplifiedDataAvail <- temp[which(temp$Cohort != "nonPD1"), c('Subject',  
                                                              'cTfh_ICOShiCD38hi_CCR6hi...FreqParent.baseline' ,          # Tflow_freq
                                                              'Plasma.CXCL13..pg.mL..baseline' ,                          # Serology
                                                              'IgDlo_CD71hi_ActBCells.Tbet....FreqParent.baseline' ,      # Bflow_freq
                                                              'CD19hi_CD27hiCD38hi_CD20lo.PB..._medfi.Foxp3..baseline' ,  # Tflow_medFI
                                                              'Sex.baseline' ,                                            # Demographics
                                                              'CD20loCD71hi...medfi.Blimp1..baseline')]                   # Bflow_medFI
                
rownames(simplifiedDataAvail) <- simplifiedDataAvail$Subject; simplifiedDataAvail$Subject <- NULL;
simplifiedDataAvail <- as.data.frame(!is.na(simplifiedDataAvail));  simplifiedDataAvail <- simplifiedDataAvail * 2 
simplifiedDataAvail <- cbind(simplifiedDataAvail, Cohort = (as.numeric(saveCohort)-1)*2, Year = as.numeric(saveYear)-1)
names(simplifiedDataAvail) <- c("Tflow_freq","Serology","Bflow_freq", "Tflow_medFI","Demographics","Bflow_medFI","Cohort", "Year")
pheatmap(as.data.frame(t(simplifiedDataAvail)), cluster_col = F,color=gray.colors(100, start=1,end=0), fontsize_row = 18, fontsize_col = 6, main = "Data available by subject"
         # , filename = "D:/Pembro-Fluvac/Analysis/Images/dataAvailable.pdf"
         )

dev.off()
## null device 
##           1
median(demog$Age[which(demog$Cohort == "aPD1" & demog$Current.Immunotherapy != "Ipi/Nivo")])
## [1] 61.5
sd(demog$Age[which(demog$Cohort == "aPD1" & demog$Current.Immunotherapy != "Ipi/Nivo")])
## [1] 11.2867
table(demog$Current.Immunotherapy[which(demog$Current.Immunotherapy != "Ipi/Nivo")])
## 
##                   Nivolumab Pembrolizumab 
##            27             8            22
x <- demog[which(demog$Cohort == "aPD1"),] 
table(x$Sex)
## 
## Female   Male 
##     11     21
x <- demog[which(demog$Cohort == "Healthy"),] 
table(x$Sex)
## 
## Female   Male 
##     12     15

———– Cohort description ——————–

table(mergedData$Cohort, mergedData$Sex, mergedData$Year)  
## , ,  = 1
## 
##          
##           Female Male
##   Healthy      0    0
##   aPD1         0    3
##   nonPD1       1    0
## 
## , ,  = 2
## 
##          
##           Female Male
##   Healthy      6    6
##   aPD1         5    6
##   nonPD1       1    0
## 
## , ,  = 3
## 
##          
##           Female Male
##   Healthy      6    9
##   aPD1         4   12
##   nonPD1       0    0
table(serology$Cohort, serology$TimeCategory, serology$Year, !is.na(serology$H1N1pdm09.HAI.titer))
## , ,  = 1,  = FALSE
## 
##          
##           baseline late oneWeek
##   aPD1           0    0       0
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 2,  = FALSE
## 
##          
##           baseline late oneWeek
##   aPD1           0    1       1
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 3,  = FALSE
## 
##          
##           baseline late oneWeek
##   aPD1           0    0       0
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 1,  = TRUE
## 
##          
##           baseline late oneWeek
##   aPD1           4    4       3
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 2,  = TRUE
## 
##          
##           baseline late oneWeek
##   aPD1          12    8       2
##   Healthy       12   12      12
##   nonPD1         3    3       1
## 
## , ,  = 3,  = TRUE
## 
##          
##           baseline late oneWeek
##   aPD1          16   13      16
##   Healthy       15   15      15
##   nonPD1         1    1       1
subsetData <- subset(demog, Current.Immunotherapy %in% c("Pembrolizumab","Nivolumab"))
table(subsetData$Current.Immunotherapy)
## 
##     Nivolumab Pembrolizumab 
##             8            22
table(subsetData$Cohort, subsetData$Sex)
##       
##        Female Male
##   aPD1      9   21
table(demog$Cohort, demog$Sex)
##          
##           Female Male
##   aPD1        11   21
##   Healthy     12   15
ggplot( data = subset(mergedData, Cohort == "aPD1" & TimeCategory == "baseline"), aes(x=Cycle.of.Immunotherapy)) + geom_histogram(color="white",fill="#FFB18C") + 
  ggtitle("Cohort 2 - aPD1 cohort") +  theme_bw() + 
  theme(axis.text=element_text(color="black",size=18), axis.title=element_text(color="black",size=24), plot.title = element_text(size=24)) + scale_x_continuous(breaks=seq(0,30,5)) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CyclesImmunotherapy.pdf", device = 'pdf', height=3, width=8)

median(mergedData[which(mergedData$Cohort == "aPD1" & mergedData$TimeCategory == "baseline"), 'Cycle.of.Immunotherapy'], na.rm = T)  
## [1] 7
sd(mergedData[which(mergedData$Cohort == "aPD1" & mergedData$TimeCategory == "baseline"), 'Cycle.of.Immunotherapy'], na.rm = T)  
## [1] 6.863131
paste("Median Age aPD1: ", median(mergedData[which(mergedData$Cohort == "aPD1" & mergedData$TimeCategory == "baseline"), 'Age'], na.rm = T)  )
## [1] "Median Age aPD1:  61.5"
paste("Median Age Healthy: ", median(mergedData[which(mergedData$Cohort == "Healthy" & mergedData$TimeCategory == "baseline"), 'Age'], na.rm = T)   )
## [1] "Median Age Healthy:  33"
paste("Range Age aPD1: ", range(mergedData[which(mergedData$Cohort == "aPD1" & mergedData$TimeCategory == "baseline"), 'Age'], na.rm = T)  )
## [1] "Range Age aPD1:  37" "Range Age aPD1:  81"
paste("Range Age Healthy: ", range(mergedData[which(mergedData$Cohort == "Healthy" & mergedData$TimeCategory == "baseline"), 'Age'], na.rm = T)   )
## [1] "Range Age Healthy:  23" "Range Age Healthy:  47"

———– Global overview of phenotypic differences ——————–

temp <- as.data.frame( cor(mergedData[ ,sapply(mergedData, is.numeric)], mergedData$Cycle.of.Immunotherapy, use="pairwise.complete.obs"))
## Warning in cor(mergedData[, sapply(mergedData, is.numeric)],
## mergedData$Cycle.of.Immunotherapy, : Missing values generated in calculation of
## cor. Likely cause: too many missing entries or zero variance.
temp$names <- rownames(temp); rownames(temp) <- NULL; 
temp[order(temp$V1,decreasing = T),][1:20,];   temp[order(temp$V1,decreasing = F),][1:20,]; 
##            V1                                         names
## 290 1.0000000                        Cycle.of.Immunotherapy
## 297 0.7252904         CD19_CD38lo.CD21loCD27hi...medfi.IgM.
## 66  0.6671501        cTfh_ICOShiCD38hi_CXCR4lo...FreqParent
## 39  0.6344893        cTfh_ICOSloCD38lo_CD127lo...FreqParent
## 164 0.6040705             CD19hi_NaiveB.CD23hi...FreqParent
## 50  0.5862303        CD19_CD27.CD38..CD20lo.PB...FreqParent
## 63  0.5848764           Live_CD3..CD4..CXCR4lo...FreqParent
## 197 0.5807487    IgDlo_CD71hi_ActBCells.CD23hi...FreqParent
## 345 0.5512736                     CD19_NaiveB...medfi.CD86.
## 244 0.5479177         cTfh_ICOShiCD38hi_cTfh..._medfi.CD36.
## 137 0.5470961  CD19hi_CD38lo.CD21loCD27hi.IgM....FreqParent
## 70  0.5380424        cTfh_ICOSloCD38lo_CXCR4lo...FreqParent
## 178 0.5286854              CD27hiCD38hi_CD11c....FreqParent
## 13  0.5264996           Live_CD3..CD4..CD127lo...FreqParent
## 372 0.5259865      CD19_NonnaiveB.CD27.CD38....medfi.CD11c.
## 212 0.5251972 IgDlo_CD71hi_CD20loCD71hi.CD23hi...FreqParent
## 458 0.5131468                  Ki67hiCD38hicTfh_medfi.CD25.
## 390 0.5095673                      IgDloCD71hi..medfi.CD25.
## 321 0.5041596                      CD19_Ki67....medfi.CD25.
## 295 0.4952655         CD19_CD38lo.CD21loCD27hi...medfi.IgD.
##             V1                                        names
## 235 -0.5940749        cTfh_ICOShiCD38hi_cTfh..._medfi.Tcf1.
## 279 -0.5478600 CD19hi_CD27hiCD38hi_CD20lo.PB..._medfi.CD25.
## 476 -0.5456248                                  FCCXCL13_oW
## 258 -0.5263091              CD19hi_CD27hiCD38hi_medfi.CD25.
## 285 -0.5064010 CD19hi_CD27hiCD38hi_CD20lo.PB..._medfi.CCR6.
## 78  -0.5032836                       Original.RU.identifier
## 368 -0.5023851      CD19_NonnaiveB.CD27.CD38....medfi.CD86.
## 264 -0.4866775              CD19hi_CD27hiCD38hi_medfi.CCR6.
## 466 -0.4853802                Ki67hiCD38hicTfh_medfi.CXCR5.
## 314 -0.4787725       CD19_CD38lo.CD21loCD27hi...medfi.Ki67.
## 346 -0.4762914                   CD19_NaiveB...medfi.CD138.
## 429 -0.4710903                      ActBCells...medfi.Ki67.
## 179 -0.4412415              CD27hiCD38hi_CD16....FreqParent
## 354 -0.4400329                    CD19_NaiveB...medfi.CD14.
## 143 -0.4381228                   CD19hi_CD138....FreqParent
## 168 -0.4360003             CD19hi_NaiveB.CD71....FreqParent
## 370 -0.4357687      CD19_NonnaiveB.CD27.CD38....medfi.CD38.
## 277 -0.4339355 CD19hi_CD27hiCD38hi_CD20lo.PB..._medfi.Tcf1.
## 146 -0.4298025                    CD19hi_Ki67....FreqParent
## 177 -0.4279286                    CD27hiCD38hi_..FreqParent
# 
# subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline");  
# res <- lapply(subsetData[,c(5:80,82:161,163:292,300:450)], function(x) t.test(x ~ subsetData$Cohort, var.equal = TRUE))
# res <- sapply(res, "[[", "p.value")



subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_NaiveB...FreqParent"))
prePostTimeAveraged(melted, title = "Naive B frequencies averaged", xLabel = NULL, yLabel = "NaiveB (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

twoSampleBar(data=subset(subsetData, TimeCategory == "baseline"), xData="Cohort", yData="CD19hi_NaiveB...FreqParent", fillParam="Cohort", title="NaiveB in CD19 at baseline", 
             yLabel="IgD+CD27- (% CD19)", position="left")  
## Warning: Removed 26 rows containing missing values (geom_point).

univScatter(data = subset(subsetData, TimeCategory == "baseline"), yData = "CD19hi_NaiveB...FreqParent", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "Cycle of IT vs NaiveB at baseline", yLabel= "IgD+CD27- (% CD19)", xLabel = "Cycle of immunotherapy") #+ scale_y_continuous(limits= c(0,12))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 41 rows containing non-finite values (stat_smooth).
## Warning: Removed 41 rows containing missing values (geom_point).

summary(lm( data = mergedData, CD19hi_NaiveB...FreqParent ~ Cohort + Year + TimeCategory ))
## 
## Call:
## lm(formula = CD19hi_NaiveB...FreqParent ~ Cohort + Year + TimeCategory, 
##     data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.041  -7.960   0.489  12.084  34.620 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           38.180      3.687  10.354 2.64e-16 ***
## CohortaPD1            14.065      3.900   3.607 0.000545 ***
## CohortnonPD1          -9.451     12.624  -0.749 0.456334    
## Year                      NA         NA      NA       NA    
## TimeCategoryoneWeek    4.930      4.369   1.128 0.262604    
## TimeCategorylate      -4.824      4.984  -0.968 0.336061    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.33 on 78 degrees of freedom
##   (90 observations deleted due to missingness)
## Multiple R-squared:  0.1829, Adjusted R-squared:  0.141 
## F-statistic: 4.365 on 4 and 78 DF,  p-value: 0.003083
summary(lm( data = subset(subsetData, TimeCategory == "baseline"), CD19hi_NaiveB...FreqParent ~ Cohort + Year + Age))
## 
## Call:
## lm(formula = CD19hi_NaiveB...FreqParent ~ Cohort + Year + Age, 
##     data = subset(subsetData, TimeCategory == "baseline"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -44.008  -9.965  -2.014   9.204  28.180 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  63.8581    14.0785   4.536 9.85e-05 ***
## CohortaPD1   32.0922    14.3105   2.243   0.0330 *  
## Year              NA         NA      NA       NA    
## Age          -0.7064     0.4150  -1.702   0.0998 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.18 on 28 degrees of freedom
##   (26 observations deleted due to missingness)
## Multiple R-squared:  0.1662, Adjusted R-squared:  0.1067 
## F-statistic: 2.791 on 2 and 28 DF,  p-value: 0.07848
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_Ki67....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+Ki67+ frequencies averaged", xLabel = NULL, yLabel = "CD19+Ki67+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, CD19hi_Ki67....FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = CD19hi_Ki67....FreqParent ~ Cohort + Year + TimeCategory, 
##     data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6583 -0.9559 -0.3589  0.5265  8.7111 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1.4238     0.3259   4.369  3.8e-05 ***
## CohortaPD1            0.2651     0.3446   0.769    0.444    
## CohortnonPD1         -0.1634     1.1157  -0.146    0.884    
## Year                      NA         NA      NA       NA    
## TimeCategoryoneWeek  -0.1708     0.3861  -0.442    0.659    
## TimeCategorylate      0.3994     0.4404   0.907    0.367    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.532 on 78 degrees of freedom
##   (90 observations deleted due to missingness)
## Multiple R-squared:  0.0328, Adjusted R-squared:  -0.0168 
## F-statistic: 0.6614 on 4 and 78 DF,  p-value: 0.6207
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_CD71....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+CD71+ frequencies averaged", xLabel = NULL, yLabel = "CD19+CD71+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, CD19hi_CD71....FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = CD19hi_CD71....FreqParent ~ Cohort + Year + TimeCategory, 
##     data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0318 -1.1309 -0.4282  0.8963  9.0763 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.46818    0.41401   5.962 6.86e-08 ***
## CohortaPD1          -0.44450    0.43785  -1.015   0.3132    
## CohortnonPD1        -0.06632    1.41735  -0.047   0.9628    
## Year                      NA         NA      NA       NA    
## TimeCategoryoneWeek  0.25628    0.49053   0.522   0.6028    
## TimeCategorylate     1.01110    0.55956   1.807   0.0746 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.946 on 78 degrees of freedom
##   (90 observations deleted due to missingness)
## Multiple R-squared:  0.04905,    Adjusted R-squared:  0.0002855 
## F-statistic: 1.006 on 4 and 78 DF,  p-value: 0.4097
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_CD80....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+CD80+ frequencies averaged", xLabel = NULL, yLabel = "CD19+CD80+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

twoSampleBar(data=subset(subsetData, TimeCategory == "baseline"), xData="Cohort", yData="CD19hi_CD80....FreqParent", fillParam="Cohort", title="CD80 in CD19 at baseline", 
             yLabel="CD80+ (% CD19)", position="right")  
## Warning: Removed 26 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CD19CD80_atBaseline_barplot.pdf")
univScatter(data = subset(subsetData, TimeCategory == "baseline"), yData = "CD19hi_CD80....FreqParent", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "Cycle of IT vs CD19+CD80+ at baseline", yLabel= "CD80+ (% CD19)", xLabel = "Cycle of immunotherapy") + scale_y_continuous(limits= c(0,8))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 41 rows containing non-finite values (stat_smooth).

## Warning: Removed 41 rows containing missing values (geom_point).

summary(lm( data = subsetData, CD19hi_CD80....FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = CD19hi_CD80....FreqParent ~ Cohort + Year + TimeCategory, 
##     data = subsetData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7273 -1.0273 -0.3969  0.7022  3.9618 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.35727    0.33671   9.971 1.64e-15 ***
## CohortaPD1          -0.76033    0.35419  -2.147    0.035 *  
## Year                      NA         NA      NA       NA    
## TimeCategoryoneWeek  0.12091    0.40326   0.300    0.765    
## TimeCategorylate    -0.01205    0.45402  -0.027    0.979    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.574 on 77 degrees of freedom
##   (75 observations deleted due to missingness)
## Multiple R-squared:  0.05924,    Adjusted R-squared:  0.02259 
## F-statistic: 1.616 on 3 and 77 DF,  p-value: 0.1924
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_CD86....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+CD86+ frequencies averaged", xLabel = NULL, yLabel = "CD19+CD86+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

twoSampleBar(data=subset(subsetData, TimeCategory == "baseline"), xData="Cohort", yData="CD19hi_CD86....FreqParent", fillParam="Cohort", title="CD86 in CD19 at baseline", 
             yLabel="CD86+ (% CD19)", position="left") 
## Warning: Removed 26 rows containing missing values (geom_point).

summary(lm( data = subsetData, CD19hi_CD86....FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = CD19hi_CD86....FreqParent ~ Cohort + Year + TimeCategory, 
##     data = subsetData)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.166 -3.226 -1.253  1.200 38.684 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           2.5390     1.3006   1.952   0.0546 .
## CohortaPD1            3.1414     1.3681   2.296   0.0244 *
## Year                      NA         NA      NA       NA  
## TimeCategoryoneWeek   1.1356     1.5577   0.729   0.4682  
## TimeCategorylate      0.1741     1.7538   0.099   0.9212  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.081 on 77 degrees of freedom
##   (75 observations deleted due to missingness)
## Multiple R-squared:  0.07067,    Adjusted R-squared:  0.03447 
## F-statistic: 1.952 on 3 and 77 DF,  p-value: 0.1283
# summary(lm( data = subset(subsetData, TimeCategory == "baseline"), logCD19CD86_FreqParent ~ Cohort ))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_IgM....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+IgM+ frequencies averaged", xLabel = NULL, yLabel = "CD19+IgM+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_CD138....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+CD138+ frequencies averaged", xLabel = NULL, yLabel = "CD19+CD138+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_Tbet....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+Tbet+ frequencies averaged", xLabel = NULL, yLabel = "CD19+Tbet+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBar(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: PB at d0", yLabel="CD19+IgD-CD27++CD38++ frequency at d0")
## Warning: Removed 26 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline" & IgDlo_CD71hi_ActBCells...FreqParent > 0 & IgDlo_CD71hi_CD20loCD71hi...FreqParent >0)
twoSampleBar(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_ActBCells...FreqParent", fillParam="Cohort", title="baseline ABC", 
             yLabel="CD20+ ABC (% CD71+)") + scale_y_continuous(limits=c(0,100), breaks=seq(0,100,10))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/ABC_atBaseline_barPlot.pdf", width=4)
twoSampleBar(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_CD20loCD71hi...FreqParent", fillParam="Cohort", title="baseline ASC", 
             yLabel="CD20- ASC (% CD71+)")+ scale_y_continuous(limits=c(0,100), breaks=seq(0,100,10))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/ASC_atBaseline_barPlot.pdf", width=4)

twoSampleBar(data=subsetData, xData="Cohort", yData="ASC_ABCratio", fillParam="Cohort", title="baseline ASC/ABC ratio", 
             yLabel="ASC/ABC ratio") + scale_y_continuous(limits=c(0,8), breaks=seq(0,100,1))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/ASC_ABCratio_atBaseline_barPlot.pdf", width=4)



subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..ICOShi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+ICOS+ frequencies averaged", xLabel = NULL, yLabel = "CD4+ICOS+ (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..ICOShi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = Live_CD3..CD4..ICOShi...FreqParent ~ Cohort + Year + 
##     TimeCategory, data = mergedData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.2666  -1.5000   0.0349   1.1448   8.9334 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.651752   1.207689   3.024  0.00303 ** 
## CohortaPD1           7.085104   0.543181  13.044  < 2e-16 ***
## CohortnonPD1         2.454473   1.208398   2.031  0.04434 *  
## Year                 0.902221   0.443262   2.035  0.04391 *  
## TimeCategoryoneWeek -0.274724   0.589921  -0.466  0.64224    
## TimeCategorylate     0.009502   0.696301   0.014  0.98913    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.992 on 126 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.5976, Adjusted R-squared:  0.5817 
## F-statistic: 37.43 on 5 and 126 DF,  p-value: < 2.2e-16
subsetData <- subset(mergedData, Cohort != "nonPD1" & Year == 3 )  # given strong influence of Year in the linear model 
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..CTLA4hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+CTLA4+ frequencies averaged", xLabel = NULL, yLabel = "CD4+CTLA4+ (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..CTLA4hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = Live_CD3..CD4..CTLA4hi...FreqParent ~ Cohort + Year + 
##     TimeCategory, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.3717 -2.5060 -0.4985  1.7944 12.7274 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -7.6043     1.6029  -4.744 5.58e-06 ***
## CohortaPD1            4.5291     0.7209   6.282 4.97e-09 ***
## CohortnonPD1          2.0930     1.6038   1.305    0.194    
## Year                  5.3590     0.5883   9.109 1.60e-15 ***
## TimeCategoryoneWeek   1.1507     0.7830   1.470    0.144    
## TimeCategorylate      0.6813     0.9242   0.737    0.462    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.972 on 126 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.5391, Adjusted R-squared:  0.5208 
## F-statistic: 29.48 on 5 and 126 DF,  p-value: < 2.2e-16
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..CD38hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+CD38hi frequencies averaged", xLabel = NULL, yLabel = "CD4+CD38hi (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..CD38hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = Live_CD3..CD4..CD38hi...FreqParent ~ Cohort + Year + 
##     TimeCategory, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.0595 -1.9124 -0.7098  1.3736 10.4820 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           8.8556     1.4138   6.264 5.44e-09 ***
## CohortaPD1           -3.7739     0.6359  -5.935 2.66e-08 ***
## CohortnonPD1         -5.0837     1.4146  -3.594 0.000466 ***
## Year                  0.4541     0.5189   0.875 0.383139    
## TimeCategoryoneWeek  -0.4285     0.6906  -0.620 0.536051    
## TimeCategorylate     -0.8058     0.8151  -0.989 0.324791    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.503 on 126 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.2577, Adjusted R-squared:  0.2283 
## F-statistic:  8.75 on 5 and 126 DF,  p-value: 3.796e-07
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..Foxp3hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+Foxp3hi frequencies averaged", xLabel = NULL, yLabel = "CD4+Foxp3hi (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..Foxp3hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = Live_CD3..CD4..Foxp3hi...FreqParent ~ Cohort + Year + 
##     TimeCategory, data = mergedData)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.779 -1.792  0.001  1.553  6.783 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          6.10294    0.92934   6.567 1.21e-09 ***
## CohortaPD1           1.44172    0.41799   3.449 0.000765 ***
## CohortnonPD1        -0.08409    0.92989  -0.090 0.928090    
## Year                -0.61461    0.34110  -1.802 0.073962 .  
## TimeCategoryoneWeek -0.53686    0.45396  -1.183 0.239185    
## TimeCategorylate     0.94317    0.53582   1.760 0.080793 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.303 on 126 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.157,  Adjusted R-squared:  0.1236 
## F-statistic: 4.694 on 5 and 126 DF,  p-value: 0.0005785
summary(lm( data = mergedData, Live_CD3..CD4..Foxp3hi...FreqParent ~ Cohort + Year + TimeCategory + Live_CD3..CD4..CTLA4hi...FreqParent + Live_CD3..CD4..ICOShi...FreqParent))
## 
## Call:
## lm(formula = Live_CD3..CD4..Foxp3hi...FreqParent ~ Cohort + Year + 
##     TimeCategory + Live_CD3..CD4..CTLA4hi...FreqParent + Live_CD3..CD4..ICOShi...FreqParent, 
##     data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.3875 -1.4936  0.0544  1.5433  7.2670 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          7.28165    0.99552   7.314 2.81e-11 ***
## CohortaPD1                          -0.13305    0.60037  -0.222   0.8250    
## CohortnonPD1                        -0.73458    0.87616  -0.838   0.4034    
## Year                                -1.77286    0.40662  -4.360 2.70e-05 ***
## TimeCategoryoneWeek                 -0.74141    0.42489  -1.745   0.0835 .  
## TimeCategorylate                     0.80584    0.49681   1.622   0.1073    
## Live_CD3..CD4..CTLA4hi...FreqParent  0.20026    0.04975   4.026 9.82e-05 ***
## Live_CD3..CD4..ICOShi...FreqParent   0.09425    0.06602   1.427   0.1560    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.13 on 124 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.2901, Adjusted R-squared:   0.25 
## F-statistic: 7.238 on 7 and 124 DF,  p-value: 2.908e-07
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..Ki67hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+Ki67hi frequencies averaged", xLabel = NULL, yLabel = "CD4+Ki67hi (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..Ki67hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = Live_CD3..CD4..Ki67hi...FreqParent ~ Cohort + Year + 
##     TimeCategory, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1984 -1.0363 -0.2317  0.6955  9.0800 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          4.22087    0.65530   6.441 2.27e-09 ***
## CohortaPD1           0.37847    0.29473   1.284  0.20147    
## CohortnonPD1        -1.07886    0.65569  -1.645  0.10238    
## Year                -0.73860    0.24052  -3.071  0.00262 ** 
## TimeCategoryoneWeek  0.07629    0.32010   0.238  0.81201    
## TimeCategorylate    -0.03257    0.37782  -0.086  0.93145    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.624 on 126 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.08669,    Adjusted R-squared:  0.05045 
## F-statistic: 2.392 on 5 and 126 DF,  p-value: 0.04133

———– Averaged frequencies over time ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(data = subsetData, xData = "TimeCategory", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "Cohort", title = "cTfh response", xLabel = "TimeCategory",
            yLabel = "ICOS+CD38+ (% cTfh)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,150,2), limits=c(0,18))     
## Warning: Removed 31 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 31 row(s) containing missing values (geom_path).
## Warning: Removed 31 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResp_overTime_PerSubject.pdf")
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_..FreqParent"))
melted <- melted[which(melted$TimeCategory != 'late'),]
prePostTimeAveraged(data = melted, title = "cTfh", xLabel = NULL, yLabel = "ICOS+CD38+ (% cTfh)") + scale_y_continuous(breaks=seq(1,10,0.5), limits=c(3.0, 7))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResp_overTime_freqcTfh.pdf", width=4)
summary( fit <- lm(formula = value ~ Cohort * TimeCategory, data = melted) );  tukey_hsd(fit)       # testing using unpaired two-way ANOVA with Tukey's posthoc
## 
## Call:
## lm(formula = value ~ Cohort * TimeCategory, data = melted)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.1862 -1.2541 -0.4574  0.8105  9.3138 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     3.88741    0.43332   8.971 3.07e-14 ***
## CohortaPD1                      0.05214    0.64668   0.081   0.9359    
## TimeCategoryoneWeek             0.65667    0.61280   1.072   0.2867    
## CohortaPD1:TimeCategoryoneWeek  1.58998    0.92053   1.727   0.0874 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.252 on 93 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.1391, Adjusted R-squared:  0.1113 
## F-statistic: 5.009 on 3 and 93 DF,  p-value: 0.002899
## # A tibble: 8 x 9
##   term  group1 group2 null.value estimate conf.low conf.high   p.adj
## * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
## 1 Coho~ Healt~ aPD1            0   0.821   -0.0929      1.73 0.0777 
## 2 Time~ basel~ oneWe~          0   1.36     0.453       2.27 0.00371
## 3 Coho~ Healt~ aPD1:~          0   0.0521  -1.64        1.74 1      
## 4 Coho~ Healt~ Healt~          0   0.657   -0.946       2.26 0.708  
## 5 Coho~ Healt~ aPD1:~          0   2.30     0.585       4.01 0.00382
## 6 Coho~ aPD1:~ Healt~          0   0.605   -1.09        2.30 0.786  
## 7 Coho~ aPD1:~ aPD1:~          0   2.25     0.450       4.04 0.00807
## 8 Coho~ Healt~ aPD1:~          0   1.64    -0.0717      3.36 0.0654 
## # ... with 1 more variable: p.adj.signif <chr>
melted %>% group_by(Cohort) %>% kruskal_test(formula = value ~ TimeCategory)                        # testing per cohort using Kruskall-Wallis without adjustment
## # A tibble: 2 x 7
##   Cohort  .y.       n statistic    df       p method        
## * <fct>   <chr> <int>     <dbl> <int>   <dbl> <chr>         
## 1 Healthy value    54      2.09     1 0.149   Kruskal-Wallis
## 2 aPD1    value    51      7.67     1 0.00561 Kruskal-Wallis
index <- melted[melted$TimeCategory=="oneWeek", 'Subject']
melted.paired <- melted[which(melted$Subject %in% index), ]
prePostTimeAveraged(data = melted.paired, title = "cTfh", xLabel = NULL, yLabel = "ICOS+CD38+ (% cTfh)") + scale_y_continuous(breaks=seq(1,10,0.5), limits=c(3.0, 7))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResp_overTime_freqcTfh_paired.pdf", width=4)
# write.csv(melted.paired, file = "cTfh_responses_freqParent.csv")                                  # paired two-way ANOVA results calculated in Prism

#************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_..FreqParent <- subsetData$cTfh_ICOShiCD38hi_..FreqParent * subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /10000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_..FreqParent"))
melted <- melted[which(melted$TimeCategory != 'late'),]
prePostTimeAveraged(melted, title = "cTfh", xLabel = NULL, yLabel = "ICOS+CD38+ cTfh (% CD4)") + scale_y_continuous(breaks=seq(0,10,0.25), limits = c(0.25,1))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResp_overTime_freqCD4.pdf", width=4)
summary( fit <- lm(formula = value ~ Cohort * TimeCategory, data = melted) ); tukey_hsd(fit)        # testing using unpaired two-way ANOVA with Tukey's posthoc
## 
## Call:
## lm(formula = value ~ Cohort * TimeCategory, data = melted)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69682 -0.19147 -0.03762  0.10808  1.78139 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.35374    0.06815   5.191 1.22e-06 ***
## CohortaPD1                      0.07838    0.10170   0.771    0.443    
## TimeCategoryoneWeek             0.08764    0.09637   0.909    0.365    
## CohortaPD1:TimeCategoryoneWeek  0.17706    0.14477   1.223    0.224    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3541 on 93 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.1139, Adjusted R-squared:  0.08533 
## F-statistic: 3.985 on 3 and 93 DF,  p-value: 0.01018
## # A tibble: 8 x 9
##   term  group1 group2 null.value estimate conf.low conf.high   p.adj
## * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
## 1 Coho~ Healt~ aPD1            0  0.164     0.0201     0.308 0.0259 
## 2 Time~ basel~ oneWe~          0  0.166     0.0233     0.309 0.0231 
## 3 Coho~ Healt~ aPD1:~          0  0.0784   -0.188      0.344 0.867  
## 4 Coho~ Healt~ Healt~          0  0.0876   -0.164      0.340 0.8    
## 5 Coho~ Healt~ aPD1:~          0  0.343     0.0736     0.613 0.00672
## 6 Coho~ aPD1:~ Healt~          0  0.00927  -0.257      0.275 1      
## 7 Coho~ aPD1:~ aPD1:~          0  0.265    -0.0179     0.547 0.0748 
## 8 Coho~ Healt~ aPD1:~          0  0.255    -0.0141     0.525 0.0698 
## # ... with 1 more variable: p.adj.signif <chr>
melted %>% group_by(Cohort) %>% kruskal_test(formula = value ~ TimeCategory)                        # testing per cohort using Kruskall-Wallis without adjustment
## # A tibble: 2 x 7
##   Cohort  .y.       n statistic    df     p method        
## * <fct>   <chr> <int>     <dbl> <int> <dbl> <chr>         
## 1 Healthy value    54      2.34     1 0.126 Kruskal-Wallis
## 2 aPD1    value    51      2.34     1 0.126 Kruskal-Wallis
index <- melted[melted$TimeCategory=="oneWeek", 'Subject']
melted.paired <- melted[which(melted$Subject %in% index), ]
prePostTimeAveraged(data = melted.paired, title = "cTfh", xLabel = NULL, yLabel = "ICOS+CD38+ cTfh (% CD4+)") + 
  scale_y_continuous(breaks=seq(0,10,0.25), limits = c(0.25,1))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResp_overTime_freqCD4_paired.pdf", width=4)
# write.csv(melted.paired, file = "cTfh_responses_freqCD4.csv")








subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_..FreqParent"))
prePostTimeAveraged(melted, title = "CD19+ frequencies averaged", xLabel = NULL, yLabel = "CD19+ (freq of CD14-CD4- live)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(subsetData, xData = "TimeCategory", yData = "CD19_CD27.CD38....FreqParent", fillParam = "Cohort", title = "PB response over time", xLabel = "TimeCategory",
            yLabel = "Plasmablast frequency", groupby = "Subject")  + scale_y_continuous(breaks=seq(0,150,5))     # limits = c(0,45)
## Warning: Removed 31 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 31 row(s) containing missing values (geom_path).

## Warning: Removed 31 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD27hiCD38hi_..FreqParent"))
prePostTimeAveraged(melted, title = "Plasmablast responses averaged", xLabel = NULL, yLabel = "CD27++CD38++ Plasmablast (% nonnaive B)") + scale_y_continuous(breaks=seq(0,99,0.25))

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$CD27hiCD38hi_..FreqParent <-  subsetData$CD27hiCD38hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent /100
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD27hiCD38hi_..FreqParent"))
prePostTimeAveraged(melted, title = "Plasmablast responses averaged", xLabel = NULL, yLabel = "CD27++CD38++ Plasmablast (% CD19)") + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent != 0 )   # exclude zero values due to presumptive technical artifact
prePostTime(subsetData, xData = "TimeCategory", yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", fillParam = "Cohort", title = "ASC response by subject", 
            xLabel = "TimeCategory", yLabel = "CD20- ASC (% IgD-CD71+)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,150,5))     
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/ASCResp_overTime_PerSubject.pdf", width = 8)
subsetData <- subset(mergedData, Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent != 0 )   # exclude zero values due to presumptive technical artifact
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent"))
prePostTimeAveraged(data = subset(melted, TimeCategory != "late"), title = "ASC", xLabel = NULL, yLabel = "CD20- ASC (% IgD-CD71+)") + 
  scale_y_continuous(breaks=seq(0,99,5), limits = c(30,70))

# ggsave( filename = "D:/Pembro-Fluvac/Analysis/Images/ASCResp_overTime_freqCD71.pdf", width=4)
# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent != 0 )   # exclude zero values due to presumptive technical artifact
subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 10000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent"))
prePostTimeAveraged(data = subset(melted, TimeCategory != "late"), title = "ASC", xLabel = NULL, yLabel = "CD20- ASC (% CD19)") + 
  scale_y_continuous(breaks=seq(0,99,0.25))

# ggsave( filename = "D:/Pembro-Fluvac/Analysis/Images/ASCResp_overTime_freqCD19.pdf", width=4)




subsetData <- subset(mergedData, Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent != 0 )   # exclude zero values due to presumptive technical artifact
prePostTime(subsetData, xData = "TimeCategory", yData = "IgDlo_CD71hi_ActBCells...FreqParent", fillParam = "Cohort", title = "ABC response by subject", 
            xLabel = "TimeCategory", yLabel = "CD20+ ABC (% IgD-CD71+)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,150,5)) 
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

subsetData <- subset(mergedData, Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent != 0 )   # exclude zero values due to presumptive technical artifact
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("IgDlo_CD71hi_ActBCells...FreqParent"))
prePostTimeAveraged(data = subset(melted, TimeCategory != "late"), title = "ABC", xLabel = NULL, 
                    yLabel = "CD20+ ABC (% CD71+)") + scale_y_continuous(breaks=seq(0,99,5), limits = c(20,60))

# ggsave( filename = "D:/Pembro-Fluvac/Analysis/Images/ABCResp_overTime_freqCD71.pdf", width=4)
# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent != 0 )   # exclude zero values due to presumptive technical artifact
subsetData$IgDlo_CD71hi_ActBCells...FreqParent <-  subsetData$IgDlo_CD71hi_ActBCells...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 10000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("IgDlo_CD71hi_ActBCells...FreqParent"))
prePostTimeAveraged(data = subset(melted, TimeCategory != "late"), title = "ABC", xLabel = NULL, 
                    yLabel = "CD20+ ABC (% CD19)")  + scale_y_continuous(breaks=seq(0,99,0.2), limits = c(0.3,1.5))

# ggsave( filename = "D:/Pembro-Fluvac/Analysis/Images/ABCResp_overTime_freqCD19.pdf", width=4)





# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_CTLA4hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_CTLA4hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent * 
  subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CTLA4hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ CTLA4hi frequencies averaged", xLabel = NULL, yLabel = "+/+ CTLA4hi (freq CD4)") 

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_CTLA4hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_CTLA4hi...FreqParent ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36819 -0.10116 -0.03021  0.07148  0.94779 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.170147   0.073844   2.304 0.022935 *  
## CohortaPD1           0.121673   0.031914   3.813 0.000219 ***
## Year                -0.014821   0.027246  -0.544 0.587456    
## TimeCategoryoneWeek  0.106015   0.035682   2.971 0.003586 ** 
## TimeCategorylate    -0.006897   0.041759  -0.165 0.869102    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1756 on 120 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.1716, Adjusted R-squared:  0.144 
## F-statistic: 6.216 on 4 and 120 DF,  p-value: 0.00014
subsetData <- subset(mergedData, Cohort != "nonPD1")
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_Ki67hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ Ki67hi frequencies averaged", xLabel = NULL, yLabel = "Ki67hi (% cTfh)") 

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort  + TimeCategory + Year))    # Year is a batch effect
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort + 
##     TimeCategory + Year, data = subsetData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.175  -9.238  -0.138   9.740  39.462 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           60.437      6.137   9.847  < 2e-16 ***
## CohortaPD1             1.274      2.652   0.480 0.631802    
## TimeCategoryoneWeek   -2.954      2.966  -0.996 0.321232    
## TimeCategorylate      -2.678      3.471  -0.772 0.441793    
## Year                  -8.291      2.264  -3.661 0.000374 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.6 on 120 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.1085, Adjusted R-squared:  0.07878 
## F-statistic: 3.651 on 4 and 120 DF,  p-value: 0.007639
tukey_hsd(aov(data = subsetData, formula = cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort:TimeCategory))  # proportion of Parent
## # A tibble: 15 x 9
##    term  group1 group2 null.value estimate conf.low conf.high p.adj p.adj.signif
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl> <dbl> <chr>       
##  1 Coho~ Healt~ aPD1:~          0    5.06     -7.67     17.8  0.859 ns          
##  2 Coho~ Healt~ Healt~          0    0.819   -11.2      12.9  1     ns          
##  3 Coho~ Healt~ aPD1:~          0   -3.27    -16.2       9.63 0.977 ns          
##  4 Coho~ Healt~ Healt~          0    1.77    -12.8      16.4  0.999 ns          
##  5 Coho~ Healt~ aPD1:~          0   -0.675   -15.3      13.9  1     ns          
##  6 Coho~ aPD1:~ Healt~          0   -4.24    -17.0       8.49 0.928 ns          
##  7 Coho~ aPD1:~ aPD1:~          0   -8.33    -21.9       5.20 0.481 ns          
##  8 Coho~ aPD1:~ Healt~          0   -3.29    -18.4      11.9  0.989 ns          
##  9 Coho~ aPD1:~ aPD1:~          0   -5.73    -20.9       9.42 0.882 ns          
## 10 Coho~ Healt~ aPD1:~          0   -4.09    -17.0       8.81 0.941 ns          
## 11 Coho~ Healt~ Healt~          0    0.949   -13.7      15.6  1     ns          
## 12 Coho~ Healt~ aPD1:~          0   -1.49    -16.1      13.1  1     ns          
## 13 Coho~ aPD1:~ Healt~          0    5.03    -10.3      20.3  0.931 ns          
## 14 Coho~ aPD1:~ aPD1:~          0    2.59    -12.7      17.9  0.996 ns          
## 15 Coho~ Healt~ aPD1:~          0   -2.44    -19.2      14.3  0.998 ns
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_Ki67hi...FreqParent", fillParam="Cohort", title="Ki67 in ICOS+CD38+ cTfh at oW", 
             yLabel="+/+ Ki67hi (% cTfh) ")

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1")
subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent * 
  subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_Ki67hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ Ki67hi frequencies averaged", xLabel = NULL, yLabel = "+/+ Ki67hi (freq CD4)") 

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort  + TimeCategory + Year))    # Year is a batch effect
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort + 
##     TimeCategory + Year, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43183 -0.09403 -0.02293  0.05632  1.12069 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.52583    0.08240   6.382 3.43e-09 ***
## CohortaPD1           0.10450    0.03561   2.934   0.0040 ** 
## TimeCategoryoneWeek  0.08498    0.03982   2.134   0.0348 *  
## TimeCategorylate    -0.04207    0.04660  -0.903   0.3684    
## Year                -0.15825    0.03040  -5.205 8.11e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.196 on 120 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.2396, Adjusted R-squared:  0.2142 
## F-statistic: 9.451 on 4 and 120 DF,  p-value: 1.123e-06
tukey_hsd(aov(data = subsetData, formula = cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort:TimeCategory))  # proportion of CD4 not FreqParent
## # A tibble: 15 x 9
##    term  group1 group2 null.value estimate conf.low conf.high  p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>  <dbl>
##  1 Coho~ Healt~ aPD1:~          0  0.0591  -0.121      0.239  0.932 
##  2 Coho~ Healt~ Healt~          0  0.0423  -0.128      0.213  0.979 
##  3 Coho~ Healt~ aPD1:~          0  0.186    0.00387    0.368  0.0423
##  4 Coho~ Healt~ Healt~          0  0.00742 -0.199      0.213  1     
##  5 Coho~ Healt~ aPD1:~          0  0.00610 -0.200      0.212  1     
##  6 Coho~ aPD1:~ Healt~          0 -0.0168  -0.196      0.163  1     
##  7 Coho~ aPD1:~ aPD1:~          0  0.127   -0.0641     0.318  0.393 
##  8 Coho~ aPD1:~ Healt~          0 -0.0517  -0.266      0.162  0.982 
##  9 Coho~ aPD1:~ aPD1:~          0 -0.0530  -0.267      0.161  0.979 
## 10 Coho~ Healt~ aPD1:~          0  0.144   -0.0385     0.326  0.209 
## 11 Coho~ Healt~ Healt~          0 -0.0349  -0.241      0.171  0.996 
## 12 Coho~ Healt~ aPD1:~          0 -0.0362  -0.242      0.170  0.996 
## 13 Coho~ aPD1:~ Healt~          0 -0.178   -0.394      0.0374 0.166 
## 14 Coho~ aPD1:~ aPD1:~          0 -0.180   -0.396      0.0361 0.16  
## 15 Coho~ Healt~ aPD1:~          0 -0.00131 -0.238      0.235  1     
## # ... with 1 more variable: p.adj.signif <chr>
subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent * 
  subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_Ki67hi...FreqParent", fillParam="Cohort", title="Ki67 in ICOS+CD38+ cTfh at oW", 
             yLabel="+/+ Ki67hi (freq CD4) ")   # this is really just reflective of the +/+ frequency difference between cohorts
## Warning: Removed 31 rows containing missing values (geom_point).

twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_cTfh..._medfi.Ki67.", fillParam="Cohort", title="Ki67 in ICOS+CD38+ cTfh at oW", 
             yLabel="+/+ for medianFI of Ki67")  
## Warning: Removed 80 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_cTfh..._medfi.CD25."))
prePostTimeAveraged(melted, title = "+/+ medFI CD25", xLabel = NULL, yLabel = "medianFI CD25 in ICOS+CD38+ cTfh") # + scale_y_continuous(breaks=seq(0,99,0.25))

twoSampleBar(data=subset(subsetData, TimeCategory == "baseline"), xData="Cohort", yData="cTfh_ICOShiCD38hi_cTfh..._medfi.CD25.", fillParam="Cohort", 
             title="medianFI CD25 in ICOS+CD38+ cTfh at baseline", yLabel="medianFI CD25 in ICOS+CD38+ cTfh", position="left")  
## Warning: Removed 26 rows containing missing values (geom_point).

univScatter(data = subset(subsetData, TimeCategory == "baseline"), yData = "cTfh_ICOShiCD38hi_cTfh..._medfi.CD25.", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "Cycle of IT vs +/+ medFI CD25", yLabel= "medianFI CD25 in ICOS+CD38+ cTfh", xLabel = "Cycle of immunotherapy") #+ scale_y_continuous(limits= c(0,12))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 41 rows containing non-finite values (stat_smooth).
## Warning: Removed 41 rows containing missing values (geom_point).

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_cTfh..._medfi.CD25. ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_cTfh..._medfi.CD25. ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -194.397  -59.213   -0.829   55.322  177.784 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            87.22      18.43   4.733 1.08e-05 ***
## CohortaPD1            116.22      20.08   5.787 1.73e-07 ***
## Year                      NA         NA      NA       NA    
## TimeCategoryoneWeek    10.77      21.55   0.500  0.61862    
## TimeCategorylate       77.82      27.85   2.794  0.00666 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 84.83 on 72 degrees of freedom
##   (80 observations deleted due to missingness)
## Multiple R-squared:  0.4138, Adjusted R-squared:  0.3894 
## F-statistic: 16.94 on 3 and 72 DF,  p-value: 2.003e-08
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CD25hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD25+ ICOS+CD38+ cTfh", xLabel = NULL, yLabel = "CD25+ (% ICOS+CD38+ cTfh)") # + scale_y_continuous(breaks=seq(0,99,0.25))

twoSampleBar(data=subset(subsetData, TimeCategory == "baseline"), xData="Cohort", yData="cTfh_ICOShiCD38hi_CD25hi...FreqParent", fillParam="Cohort", 
             title="CD25+ ICOS+CD38+ cTfh at baseline", yLabel="CD25+ (% ICOS+CD38+ cTfh)", position="left")  
## Warning: Removed 8 rows containing missing values (geom_point).

univScatter(data = subset(subsetData, TimeCategory == "baseline"), yData = "cTfh_ICOShiCD38hi_CD25hi...FreqParent", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "Cycle of IT vs +/+ CD25+", yLabel= "CD25+ (% ICOS+CD38+ cTfh)", xLabel = "Cycle of immunotherapy") #+ scale_y_continuous(limits= c(0,12))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 38 rows containing non-finite values (stat_smooth).
## Warning: Removed 38 rows containing missing values (geom_point).

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_CD25hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_CD25hi...FreqParent ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.883 -3.674 -1.379  2.554 21.865 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           1.1562     2.2431   0.515   0.6072  
## CohortaPD1            0.9674     0.9694   0.998   0.3203  
## Year                  1.5887     0.8276   1.920   0.0573 .
## TimeCategoryoneWeek   0.6567     1.0839   0.606   0.5457  
## TimeCategorylate      2.2016     1.2685   1.736   0.0852 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.335 on 120 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.06294,    Adjusted R-squared:  0.03171 
## F-statistic: 2.015 on 4 and 120 DF,  p-value: 0.09662
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_Foxp3hi...FreqParent"))
prePostTimeAveraged(melted, title = "Foxp3+ ICOS+CD38+ cTfh", xLabel = NULL, yLabel = "Foxp3+ (% ICOS+CD38+ cTfh)") # + scale_y_continuous(breaks=seq(0,99,0.25))

twoSampleBar(data=subset(subsetData, TimeCategory == "baseline"), xData="Cohort", yData="cTfh_ICOShiCD38hi_Foxp3hi...FreqParent", fillParam="Cohort", 
             title="Foxp3+ ICOS+CD38+ cTfh at baseline", yLabel="Foxp3+ (% ICOS+CD38+ cTfh)", position="left")  
## Warning: Removed 8 rows containing missing values (geom_point).

twoSampleBar(data=subset(subsetData, TimeCategory == "oneWeek"), xData="Cohort", yData="cTfh_ICOShiCD38hi_Foxp3hi...FreqParent", fillParam="Cohort", 
             title="Foxp3+ ICOS+CD38+ cTfh at baseline", yLabel="Foxp3+ (% ICOS+CD38+ cTfh)", position="left")  

univScatter(data = subset(subsetData, TimeCategory == "baseline"), yData = "cTfh_ICOShiCD38hi_Foxp3hi...FreqParent", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "Cycle of IT vs +/+ Foxp3+", yLabel= "Foxp3+ (% ICOS+CD38+ cTfh)", xLabel = "Cycle of immunotherapy") #+ scale_y_continuous(limits= c(0,12))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 38 rows containing non-finite values (stat_smooth).

## Warning: Removed 38 rows containing missing values (geom_point).

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_Foxp3hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_Foxp3hi...FreqParent ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.4612  -4.3036  -0.0222   4.1577  15.1291 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          16.3450     2.7565   5.930    3e-08 ***
## CohortaPD1           -0.5286     1.1913  -0.444  0.65804    
## Year                 -0.2870     1.0171  -0.282  0.77828    
## TimeCategoryoneWeek  -4.4617     1.3320  -3.350  0.00108 ** 
## TimeCategorylate      1.0659     1.5588   0.684  0.49543    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.556 on 120 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.1241, Adjusted R-squared:  0.09489 
## F-statistic:  4.25 on 4 and 120 DF,  p-value: 0.00298
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CD25.Foxp3....FreqParent"))
prePostTimeAveraged(melted, title = "CD25+Foxp3+ ICOS+CD38+ cTfh", xLabel = NULL, yLabel = "CD25+Foxp3+ (% ICOS+CD38+ cTfh)") # + scale_y_continuous(breaks=seq(0,99,0.25))

twoSampleBar(data=subset(subsetData, TimeCategory == "baseline"), xData="Cohort", yData="cTfh_ICOShiCD38hi_CD25.Foxp3....FreqParent", fillParam="Cohort", 
             title="CD25+Foxp3+ ICOS+CD38+ cTfh at baseline", yLabel="CD25+Foxp3+ (% ICOS+CD38+ cTfh)", position="left")  
## Warning: Removed 8 rows containing missing values (geom_point).

univScatter(data = subset(subsetData, TimeCategory == "baseline"), yData = "cTfh_ICOShiCD38hi_CD25.Foxp3....FreqParent", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "Cycle of IT vs +/+ CD25+Foxp3+", yLabel= "CD25+Foxp3+ (% ICOS+CD38+ cTfh)", xLabel = "Cycle of immunotherapy") #+ scale_y_continuous(limits= c(0,12))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 38 rows containing non-finite values (stat_smooth).

## Warning: Removed 38 rows containing missing values (geom_point).

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_CD25hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_CD25hi...FreqParent ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.883 -3.674 -1.379  2.554 21.865 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           1.1562     2.2431   0.515   0.6072  
## CohortaPD1            0.9674     0.9694   0.998   0.3203  
## Year                  1.5887     0.8276   1.920   0.0573 .
## TimeCategoryoneWeek   0.6567     1.0839   0.606   0.5457  
## TimeCategorylate      2.2016     1.2685   1.736   0.0852 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.335 on 120 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.06294,    Adjusted R-squared:  0.03171 
## F-statistic: 2.015 on 4 and 120 DF,  p-value: 0.09662
# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_CXCR3hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_CXCR3hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent * 
  subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CXCR3hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ CXCR3hi frequencies averaged", xLabel = NULL, yLabel = "+/+ CXCR3hi (freq CD4)") 

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_CXCR3hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_CXCR3hi...FreqParent ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35024 -0.09508 -0.02162  0.05517  1.10230 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.234988   0.077514   3.032 0.002981 ** 
## CohortaPD1           0.086057   0.033500   2.569 0.011428 *  
## Year                -0.059176   0.028600  -2.069 0.040684 *  
## TimeCategoryoneWeek  0.147541   0.037456   3.939 0.000138 ***
## TimeCategorylate    -0.004335   0.043834  -0.099 0.921388    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1844 on 120 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.1847, Adjusted R-squared:  0.1575 
## F-statistic: 6.794 on 4 and 120 DF,  p-value: 5.79e-05
# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_CD25hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_CD25hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent * 
  subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CD25hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ CD25hi frequencies averaged", xLabel = NULL, yLabel = "+/+ CD25hi (freq CD4)") 

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_CD25hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_CD25hi...FreqParent ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.041703 -0.019045 -0.005201  0.006227  0.173347 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          0.029688   0.013646   2.176   0.0315 *
## CohortaPD1           0.009247   0.005897   1.568   0.1195  
## Year                -0.005172   0.005035  -1.027   0.3063  
## TimeCategoryoneWeek  0.013112   0.006594   1.989   0.0490 *
## TimeCategorylate     0.008206   0.007717   1.063   0.2897  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03246 on 120 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.05569,    Adjusted R-squared:  0.02422 
## F-statistic: 1.769 on 4 and 120 DF,  p-value: 0.1395

———– Cellular subset frequencies at each time point ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="cTfh +/+ at d0", yLabel="ICOS+CD38+ cTfh frequency at d0")
## Warning: Removed 8 rows containing missing values (geom_point).

# subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 0)
# twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="ICOS+CD38+ cTfh at d7", yLabel="ICOS+CD38+ cTfh (% CXCR5+)") + 
#   coord_cartesian(ylim = c(0.5,11)) + scale_y_continuous(breaks=seq(1:12))
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResponse_justDay7_AllYrs.pdf", device='pdf', width=7, height=7)

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBar(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: Plasmablast at d7", yLabel="CD19+CD27++CD38++ frequency at d7")
## Warning: Removed 18 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
subsetData$CD27hiCD38hi_..FreqParent <-  subsetData$CD27hiCD38hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 100
twoSampleBar(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: PB (freq CD19) at d7", yLabel="CD19+CD27++CD38++ frequency at d7")
## Warning: Removed 18 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBar(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_CD20loCD71hi...FreqParent", fillParam="Cohort", title="All yrs: Plasmablast at d7", yLabel="IgDloCD71+CD20lo frequency at d7")
## Warning: Removed 18 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 10000
twoSampleBar(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_CD20loCD71hi...FreqParent", fillParam="Cohort", title="All yrs: Plasmablast at d7", yLabel="IgDloCD71+CD20lo frequency at d7")
## Warning: Removed 18 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBar(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_ActBCells...FreqParent", fillParam="Cohort", title="All yrs: ABC at d7", yLabel="ABC frequency at d7")
## Warning: Removed 18 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
subsetData$IgDlo_CD71hi_ActBCells...FreqParent <-  subsetData$IgDlo_CD71hi_ActBCells...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 10000
twoSampleBar(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_ActBCells...FreqParent", fillParam="Cohort", title="All yrs: ABC at d7", yLabel="ABC frequency at d7")
## Warning: Removed 18 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
temp <- list()
plotElements <- c("cTfh_ICOShiCD38hi_cTfh..._medfi.SLAM.", "cTfh_ICOShiCD38hi_cTfh..._medfi.CD127.", "cTfh_ICOShiCD38hi_cTfh..._medfi.CXCR5.", "cTfh_ICOShiCD38hi_cTfh..._medfi.Ki67.",
                  "cTfh_ICOShiCD38hi_cTfh..._medfi.PD1.", "cTfh_ICOShiCD38hi_cTfh..._medfi.aIgG4..batchEffect", "cTfh_ICOShiCD38hi_cTfh..._medfi.CD25.", "cTfh_ICOShiCD38hi_cTfh..._medfi.CXCR3.",
                  "cTfh_ICOShiCD38hi_cTfh..._medfi.ICOS.", "cTfh_ICOShiCD38hi_cTfh..._medfi.CD27..batchEffect", "cTfh_ICOShiCD38hi_cTfh..._medfi.CTLA4..batchEffect",   "cTfh_ICOShiCD38hi_cTfh..._medfi.CD38." ,
                  "cTfh_ICOShiCD38hi_cTfh..._medfi.CCR6.",  "cTfh_ICOShiCD38hi_cTfh..._medfi.CD28.", "cTfh_ICOShiCD38hi_cTfh..._medfi.CXCR4.", "cTfh_ICOShiCD38hi_cTfh..._medfi.Foxp3." )
plotNames <- c("SLAM", "CD127","CXCR5","Ki67",
               "PD-1", "aIgG4", "CD25","CXCR3", 
               "ICOS", "CD27", "CTLA4", "CD38",
               "CCR6", "CD28", "CXCR4", "Foxp3")
# for (i in 1:length(plotElements))  { ggsave(twoSampleBar(data=subsetData, xData="Cohort", yData=plotElements[i], fillParam="Cohort", title=plotNames[i], yLabel="medianFI"), filename = paste0("D:/Pembro-Fluvac/Analysis/Images/cTfhPhenotype_",plotNames[i], ".pdf"), width=4, height=5) }

———– Tfh and PB Fold-change at day 7 ——————–

subsetData <- mergedData[,c("cTfh_ICOShiCD38hi_..FreqParent","Subject","TimeCategory")]
temp <- str_split(subsetData$Subject,"-", simplify=T); subsetData <- as.data.frame(cbind(subsetData,temp))
names(subsetData) <- c("cTfhfreq","FullLabel", "TimeCategory","Cohort","Subject")
data_wide <-  pivot_wider(data = subsetData, names_from = c('TimeCategory') , values_from = c("Subject", "cTfhfreq"))
ggplot(data_wide, aes(x=cTfhfreq_baseline, y=cTfhfreq_oneWeek, label=FullLabel)) + geom_point() + geom_label(size=3) + theme_bw() 
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 13 rows containing missing values (geom_label).

cor.test(x = data_wide$cTfhfreq_baseline, y=data_wide$cTfhfreq_oneWeek, use="complete.obs", method='kendall')
## 
##  Kendall's rank correlation tau
## 
## data:  data_wide$cTfhfreq_baseline and data_wide$cTfhfreq_oneWeek
## z = 2.364, p-value = 0.01808
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##       tau 
## 0.2287739
subsetData <- subset(mergedData, TimeCategory == "oneWeek" & Cohort != "nonPD1" & cTfh_ICOShiCD38hi_..FreqParent > 0)
twoSampleBar(data=subsetData, xData="Cohort", yData="FCtfh_oW", fillParam="Cohort", title="cTfh", yLabel="Fold-change at one week", FCplot=T, ttest = F) + 
  scale_y_continuous(breaks=seq(0,99,1)) + theme(axis.title.y = element_text(vjust=1.9))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResponse_foldchange_AllYrs.pdf", device="pdf", width=3.5)
subsetData %>% group_by(Cohort) %>% shapiro_test(FCtfh_oW)
## # A tibble: 2 x 4
##   Cohort  variable statistic        p
##   <fct>   <chr>        <dbl>    <dbl>
## 1 Healthy FCtfh_oW     0.988 0.982   
## 2 aPD1    FCtfh_oW     0.741 0.000130
wilcox_test(subsetData, FCtfh_oW ~ Cohort)
## # A tibble: 1 x 7
##   .y.      group1  group2    n1    n2 statistic      p
## * <chr>    <chr>   <chr>  <int> <int>     <dbl>  <dbl>
## 1 FCtfh_oW Healthy aPD1      27    20       180 0.0535
# aggregate( FC_Tfhresponse, by= list( FC_Tfhresponse$Cohort), FUN=mean, na.rm = T)               # orphan calculation, not sure what this goes to?

subsetData <- subset(mergedData, TimeCategory != "late"  & Cohort != "nonPD1")
FC_PBresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent")); 
FC_PBresponse$FC <- FC_PBresponse$`oneWeek`/FC_PBresponse$`baseline`
twoSampleBar(data=FC_PBresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="Plasmablasts", yLabel="Fold-change at one week (% IgD-)",
             FCplot=T) +   scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 27 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/PBresponse_foldchange_AllYrs.pdf", device="pdf", width=4.1, height=7)

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Year == "3" & Cohort != "nonPD1")
subsetData$CD27hiCD38hi_..FreqParent <-  subsetData$CD27hiCD38hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent /100
FC_PBresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent")); 
FC_PBresponse$FC <- FC_PBresponse$`oneWeek`/FC_PBresponse$`baseline`
twoSampleBox(data=FC_PBresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="PB response (freq CD19) at one week", yLabel="CD19+CD27++CD38++ fold-change") + 
  scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent > 0)
FC_ASCresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent")); 
FC_ASCresponse$FC <- FC_ASCresponse$`oneWeek`/FC_ASCresponse$`baseline`
twoSampleBar(data=FC_ASCresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="ASC", yLabel="ASC fold-change (% CD71+)", FCplot = T) + 
  scale_y_continuous(breaks=seq(0,99,1), limits = c(0,3)) 
## Warning: Removed 2 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/ASCresponse_foldchange_AllYrs.pdf", device="pdf", width=4, height=7)

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent > 0)
subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_ASCresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent")); 
FC_ASCresponse$FC <- FC_ASCresponse$`oneWeek`/FC_ASCresponse$`baseline`
twoSampleBar(data=FC_ASCresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="ASC (freq CD19)", yLabel="ASC fold-change (% CD19)", FCplot=T) + 
  scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 2 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent > 0)
FC_ABCresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_ActBCells...FreqParent")); 
FC_ABCresponse$FC <- FC_ABCresponse$`oneWeek`/FC_ABCresponse$`baseline`
twoSampleBar(data=FC_ABCresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="ABC", yLabel="ABC fold-change (% CD71+)", FCplot=T) + 
  scale_y_continuous(limits = c(0,4))
## Warning: Removed 2 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/ABCresponse_foldchange_AllYrs.pdf", device="pdf", width=4, height=7)

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent > 0)
subsetData$IgDlo_CD71hi_ActBCells...FreqParent <-  subsetData$IgDlo_CD71hi_ActBCells...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_ABCresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_ActBCells...FreqParent")); 
FC_ABCresponse$FC <- FC_ABCresponse$`oneWeek`/FC_ABCresponse$`baseline`
twoSampleBar(data=FC_ABCresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="ABC (freq CD19)", yLabel="ABC fold-change (% CD19)")
## Warning: Removed 2 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1")
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent")); 
FC_Tfhresponse$PB <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "PB", fillParam = "Cohort", title = "FC PB (freqParent) vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "CD19+CD27++CD38++ PB fold-change") + scale_y_continuous(limits=c(0,12))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1"  )
subsetData$CD27hiCD38hi_..FreqParent <-  subsetData$CD27hiCD38hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent")); 
FC_Tfhresponse$PB <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "PB", fillParam = "Cohort", title = "FC PB (as freq CD19) vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "CD19+CD27++CD38++ PB fold-change") + scale_y_continuous(limits=c(0,12))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1"  & IgDlo_CD71hi_CD20loCD71hi...FreqParent > 0 )
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent")); 
FC_Tfhresponse$PB <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "PB", fillParam = "Cohort", title = "FC PB vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "IgDloCD71+CD20lo PB fold-change")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent > 0 )
subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent")); 
FC_Tfhresponse$PB <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "PB", fillParam = "Cohort", title = "FC PB (as freq CD19) vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "IgDloCD71+CD20lo PB fold-change") + scale_y_continuous(limits=c(0,12))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1"  & IgDlo_CD71hi_ActBCells...FreqParent > 0 )
FC_response <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_response$FC_Tfh <- FC_response$`oneWeek`/FC_response$`baseline`
FC_response2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_ActBCells...FreqParent")); 
FC_response$ABC <- FC_response2$`oneWeek`/FC_response2$`baseline`
bivScatter(data1 = FC_response[which(FC_response$Cohort == "Healthy"),], data2 = FC_response[ which(FC_response$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "ABC", fillParam = "Cohort", title = "FC ABC vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "IgDloCD71+CD20hi ABC fold-change") + scale_y_continuous(limits=c(0,4))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 26 rows containing missing values (geom_smooth).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent > 0 )
subsetData$IgDlo_CD71hi_ActBCells...FreqParent <-  subsetData$IgDlo_CD71hi_ActBCells...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_response <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_response$FC_Tfh <- FC_response$`oneWeek`/FC_response$`baseline`
FC_response2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_ActBCells...FreqParent")); 
FC_response$ABC <- FC_response2$`oneWeek`/FC_response2$`baseline`
bivScatter(data1 = FC_response[which(FC_response$Cohort == "Healthy"),], data2 = FC_response[ which(FC_response$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "ABC", fillParam = "Cohort", title = "FC ABC (as freq CD19) vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "IgDloCD71+CD20hi ABC fold-change") + scale_y_continuous(limits=c(0,12))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

———– PB vs cTfh response correlations ——————–

## ***************    CD27++CD38++     ********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
univScatter(data = oneWeek, yData = "CD27hiCD38hi_..FreqParent", xData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs CD27++CD38++ at oneWeek", yLabel= "CD27++CD38++ (freq IgD-)", xLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).

subsetData1 <- subset(oneWeek, Cohort == "Healthy" )    
subsetData2 <- subset(oneWeek, Cohort == "aPD1")    
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "CD27hiCD38hi_..FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
                fillParam = "Cohort", title = "cTfh vs PB response at oW", yLabel= "CD27++CD38++ (% IgD-CD19+)", xLabel = "ICOS+CD38+ (% cTfh)") + 
  scale_x_continuous(breaks=seq(0,99,1),limits=c(0,12)) + scale_y_continuous(breaks=seq(0,99,2),limits=c(0,15))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_smooth).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_PB-27-38-d7_freqCD71.pdf", device="pdf", width=9, height=7)

oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
univScatter(data = oneWeek, yData = "FCPB_oW", xData = "FCtfh_oW", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs CD27++CD38++ at oneWeek", yLabel= "CD27++CD38++ (freq IgD-)", xLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).

subsetData1 <- subset(oneWeek, Cohort == "Healthy" )    
subsetData2 <- subset(oneWeek, Cohort == "aPD1")    
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "FCPB_oW", xData="FCtfh_oW", 
           fillParam = "Cohort", title = "cTfh vs PB response", yLabel= "Fold-change CD27++CD38++ PB", xLabel = "Fold-change ICOS+CD38+ cTfh") + 
  scale_x_continuous(breaks=seq(0,99,1),limits=c(0,7)) + scale_y_continuous(breaks=seq(0,99,2),limits=c(0,15))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-vs-PB_foldchange.pdf", device="pdf", width=9, height=7)



## ***************    CD71+ CD20loASC    ********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
subsetData1 <- subset(oneWeek, Cohort == "Healthy" )   
subsetData2 <- subset(oneWeek, Cohort == "aPD1"  )    
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
                fillParam = "Cohort", title = "Cellular response at one week", yLabel= "ASC (% CD71)", xLabel = "ICOS+CD38+ (% cTfh)") + 
  coord_cartesian(xlim=c(0,12),ylim=c(0,100))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_ASC-71hi-20lo-d7_freqCD71_biv.pdf", device="pdf")
univScatter(data = oneWeek, yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", xData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs ASC at one week", yLabel= "ASC (% CD71+)", xLabel = "ICOS+CD38+ (% cTfh)") + coord_cartesian(xlim=c(0,12),ylim=c(0,100))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_ASC-71hi-20lo-d7_freqCD71_univ.pdf", device="pdf")

# ************** different denominator *********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  oneWeek$IgDlo_CD71hi_CD20loCD71hi...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000
subsetData1 <- subset(oneWeek, Cohort == "Healthy" )   
subsetData2 <- subset(oneWeek, Cohort == "aPD1"  )    
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "cTfh vs CD20lo response at oneWeek", yLabel= "ASC (% CD19)", xLabel = "ICOS+CD38+ (% cTfh)") + coord_cartesian(xlim = c(0,12), ylim=c(0,3))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_ASC-71hi-20lo-d7_freqCD19_biv.pdf", device="pdf")
univScatter(data = oneWeek, yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", xData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs ASC at oneWeek", yLabel= "ASC (% CD19)", xLabel = "ICOS+CD38+ (% cTfh)") + coord_cartesian(xlim = c(0,12), ylim=c(0,3))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_ASC-71hi-20lo-d7_freqCD19_univ.pdf", device="pdf")
  

## ***************    CD71+ CD20hi ABC    ********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
subsetData1 <- subset(oneWeek, Cohort == "Healthy"  )    
subsetData2 <- subset(oneWeek, Cohort == "aPD1" ) 
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "IgDlo_CD71hi_ActBCells...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
                fillParam = "Cohort", title = " cTfh vs ABC at oneWeek", yLabel= "ABC (% CD71+)", xLabel = "ICOS+CD38+ (% cTfh)") + 
  scale_x_continuous(breaks=seq(0,99,1), limits=c(0,10)) + scale_y_continuous(breaks=seq(0,100,25), limits = c(0,100))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_ABC-71hi-20hi-d7_freqCD71_biv.pdf", device="pdf")
univScatter(data = oneWeek, yData = "IgDlo_CD71hi_ActBCells...FreqParent", xData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs ABC at oneWeek", yLabel= "ABC (% CD71+)", xLabel = "ICOS+CD38+ (% cTfh)")  + 
  scale_x_continuous(breaks=seq(0,99,1), limits=c(0,10)) + scale_y_continuous(breaks=seq(0,100,25), limits = c(0,100))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 23 rows containing non-finite values (stat_smooth).
## Warning: Removed 23 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_ABC-71hi-20hi-d7_freqCD71_univ.pdf", device="pdf")

# ************** different denominator *********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$IgDlo_CD71hi_ActBCells...FreqParent <-  oneWeek$IgDlo_CD71hi_ActBCells...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000

subsetData1 <- subset(oneWeek, Cohort == "Healthy"   )   
subsetData2 <- subset(oneWeek, Cohort == "aPD1"  )      
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "IgDlo_CD71hi_ActBCells...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "cTfh vs ABC at oneWeek", yLabel= "ABC (% CD19)", xLabel = "ICOS+CD38+ (% cTfh)") + coord_cartesian(xlim=c(0,12),ylim=c(0,3))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_ABC-71hi-20hi-d7_freqCD19_biv.pdf", device="pdf")
univScatter(data = oneWeek, yData = "IgDlo_CD71hi_ActBCells...FreqParent", xData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs CD20hi at oneWeek", yLabel= "ABC (% CD19)", xLabel = "ICOS+CD38+ (% cTfh)") + coord_cartesian(xlim=c(0,12),ylim=c(0,3))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_ABC-71hi-20hi-d7_freqCD19_univ.pdf", device="pdf")




# ************** correlations to Tfh responses *********************
baseline <- subset(mergedData, TimeCategory == "baseline")
baseline$IgDlo_CD71hi_ActBCells...FreqParent <-  baseline$IgDlo_CD71hi_ActBCells...FreqParent * baseline$IgDlo_CD71hi_..FreqParent * baseline$CD19hi_NotNaiveB_freqParent / 10000
baseline$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  baseline$IgDlo_CD71hi_CD20loCD71hi...FreqParent * baseline$IgDlo_CD71hi_..FreqParent * baseline$CD19hi_NotNaiveB_freqParent / 10000

subsetData1 <- subset(baseline, Cohort == "Healthy"   )   
subsetData2 <- subset(baseline, Cohort == "aPD1"  )      
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "IgDlo_CD71hi_ActBCells...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "ABC at baseline", yLabel= "ABC (% CD19)", xLabel = "ICOS+CD38+ (% cTfh)") + coord_cartesian(xlim=c(0,7.5),ylim=c(0,3))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d0_vs_ABC-71hi-20hi-d0_freqCD19_biv.pdf", device="pdf")


subsetData1 <- subset(baseline, Cohort == "Healthy"   )   
subsetData2 <- subset(baseline, Cohort == "aPD1"  )      
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "ASC at baseline", yLabel= "ASC (% CD19)", xLabel = "ICOS+CD38+ (% cTfh)") + coord_cartesian(xlim=c(0,7.5),ylim=c(0,3))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d0_vs_ASC-71hi-20lo-d0_freqCD19_biv.pdf", device="pdf")

cTfhCorrel <- function( data, subset )
{
  z <- vector()
  z[1] <- subset
  z[2] <- round(cor(data[ which(data$Cohort == "Healthy"), subset], data[ which(data$Cohort == "Healthy") ,"cTfh_ICOShiCD38hi_..FreqParent"], 
                    method = "pearson", use = "complete.obs"), 2)
  z[3] <- round(cor(data[ which(data$Cohort == "aPD1"), subset], data[ which(data$Cohort == "aPD1"),"cTfh_ICOShiCD38hi_..FreqParent"], 
                    method = "pearson", use = "complete.obs"), 2)
  return(z)
}
result <- data.frame(CellType = 0, Healthy = 0, aPD1 = 0)
result[1,] <- cTfhCorrel(baseline, subset = "IgDlo_CD71hi_ActBCells...FreqParent")
result[2,] <- cTfhCorrel(baseline, subset = "IgDlo_CD71hi_CD20loCD71hi...FreqParent")
result$CellType <- str_replace(result$CellType, "IgDlo_CD71hi_ActBCells...FreqParent", "ABC \n(% CD19)")
result$CellType <- str_replace(result$CellType, "IgDlo_CD71hi_CD20loCD71hi...FreqParent", "ASC \n(% CD19)")
result <- melt(result, id.vars = "CellType", measure.vars = c("Healthy","aPD1")); result$value <- as.numeric(result$value)
result$CellType <- factor(result$CellType, levels = c("ASC \n(% CD19)",  "ABC \n(% CD19)"))
ggplot(result, aes(x=CellType, fill=variable, color="bl")) + geom_bar(aes(y = value), stat='identity', position="dodge" ) + theme_bw()  + 
  scale_fill_manual(values=c("#7FAEDB", "#FFB18C")) + scale_color_manual(values="black") + ggtitle("Correlation with \nTfh responses at baseline") + 
  ylab("Pearson r") + 
  theme(axis.text.x = element_text(angle = 0), axis.title = element_text(size=16,hjust = 0.5), plot.title = element_text(size=18,hjust = 0.5), 
        axis.title.x = element_blank(), axis.text = element_text(size=16, color="black"), legend.position = "none") + 
  scale_y_continuous(breaks = seq(-1,1,0.1), limits=c(-0.2,0.6))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d0_vs_various-d0_pearsons_freqCD19.pdf", device = "pdf", width=4, height=6)



oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$IgDlo_CD71hi_ActBCells...FreqParent <-  oneWeek$IgDlo_CD71hi_ActBCells...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000
oneWeek$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  oneWeek$IgDlo_CD71hi_CD20loCD71hi...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000

cTfhCorrel <- function( data, subset )
{
  z <- vector()
  z[1] <- subset
  z[2] <- round(cor(data[ which(data$Cohort == "Healthy"), subset], data[ which(data$Cohort == "Healthy") ,"cTfh_ICOShiCD38hi_..FreqParent"], 
                    method = "pearson", use = "complete.obs"), 2)
  z[3] <- round(cor(data[ which(data$Cohort == "aPD1"), subset], data[ which(data$Cohort == "aPD1"),"cTfh_ICOShiCD38hi_..FreqParent"], 
                    method = "pearson", use = "complete.obs"), 2)
  return(z)
}
result <- data.frame(CellType = 0, Healthy = 0, aPD1 = 0)
result[1,] <- cTfhCorrel(oneWeek, subset = "IgDlo_CD71hi_ActBCells...FreqParent")
result[2,] <- cTfhCorrel(oneWeek, subset = "IgDlo_CD71hi_CD20loCD71hi...FreqParent")
result$CellType <- str_replace(result$CellType, "IgDlo_CD71hi_ActBCells...FreqParent", "ABC \n(% CD19)")
result$CellType <- str_replace(result$CellType, "IgDlo_CD71hi_CD20loCD71hi...FreqParent", "ASC \n(% CD19)")
result <- melt(result, id.vars = "CellType", measure.vars = c("Healthy","aPD1")); result$value <- as.numeric(result$value)
result$CellType <- factor(result$CellType, levels = c("ASC \n(% CD19)",  "ABC \n(% CD19)"))
ggplot(result, aes(x=CellType, fill=variable, color="bl")) + geom_bar(aes(y = value), stat='identity', position="dodge" ) + theme_bw()  + 
  scale_fill_manual(values=c("#7FAEDB", "#FFB18C")) + scale_color_manual(values="black") + ggtitle("Correlation with \nTfh responses at one week") + 
  ylab("Pearson r") + 
  theme(axis.text.x = element_text(angle = 0), axis.title = element_text(size=16,hjust = 0.5), plot.title = element_text(size=18,hjust = 0.5), 
        axis.title.x = element_blank(), axis.text = element_text(size=16, color="black"), legend.position = "none") + 
  scale_y_continuous(breaks = seq(-1,1,0.1), limits=c(-0.2,0.6))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_various-d7_pearsons_freqCD19.pdf", device = "pdf", width=4, height=6)

———– Vaccine response determinants and biomarkers ——————–

# correlate HAI vs cycle of IT
subsetData <- subset(mergedData,  cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
univScatter(data = subsetData, yData = "cTfh_ICOShiCD38hi_..FreqParent", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "Cycle of IT vs +/+ cTfh at bL", yLabel= "ICOS+CD38+ cTfh freq", xLabel = "Cycle of immunotherapy") + coord_cartesian(ylim= c(0,8))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 104 rows containing non-finite values (stat_smooth).
## Warning: Removed 104 rows containing missing values (geom_point).

summary(fit <- lm(Cycle.of.Immunotherapy ~ cTfh_ICOShiCD38hi_..FreqParent, subsetData))
## 
## Call:
## lm(formula = Cycle.of.Immunotherapy ~ cTfh_ICOShiCD38hi_..FreqParent, 
##     data = subsetData)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.727 -4.732 -1.904  2.718 18.028 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      19.914      7.002   2.844   0.0117 *
## cTfh_ICOShiCD38hi_..FreqParent   -2.557      1.787  -1.431   0.1717  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.529 on 16 degrees of freedom
##   (104 observations deleted due to missingness)
## Multiple R-squared:  0.1135, Adjusted R-squared:  0.05805 
## F-statistic: 2.048 on 1 and 16 DF,  p-value: 0.1717
univScatter(data = subsetData, yData = "FCtfh_oW", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "cTfh responses", yLabel= "ICOS+CD38+ cTfh FoldChange", xLabel = "Cycle of immunotherapy")  + scale_fill_manual(values=c("#FFB18C"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 105 rows containing non-finite values (stat_smooth).
## Warning: Removed 105 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CycleIT_vs_cTfhFC.pdf", device='pdf')

univScatter(data = subsetData, yData = "FChai_late", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "HAI fold-change", yLabel= "H1N1pdm09 HAI fold-change", xLabel = "Cycle of immunotherapy")  + coord_cartesian(ylim = c(0,5)) + 
  scale_x_continuous (breaks=seq(0,40,5))+ scale_fill_manual(values=c("#FFB18C"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 108 rows containing non-finite values (stat_smooth).
## Warning: Removed 108 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CycleIT_vs_HAItiterFC.pdf", device='pdf', width=6, height=6)

summary(fit <- lm(FChai_late ~ FCtfh_oW, data = mergedData))
## 
## Call:
## lm(formula = FChai_late ~ FCtfh_oW, data = mergedData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71393 -0.30509 -0.16094  0.09187  1.67824 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.26578    0.07655  16.535   <2e-16 ***
## FCtfh_oW     0.03011    0.04421   0.681    0.497    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4708 on 127 degrees of freedom
##   (44 observations deleted due to missingness)
## Multiple R-squared:  0.003641,   Adjusted R-squared:  -0.004205 
## F-statistic: 0.464 on 1 and 127 DF,  p-value: 0.497
bivScatter(data1 = subset(mergedData, Cohort == "Healthy"), data2 = subset(mergedData, Cohort == "aPD1"), 
           name1 = "Healthy", name2 = "aPD1", xData = "FCtfh_oW", yData = "FChai_late", fillParam = "Cohort", title = "FC HAI vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "H1N1pdm09 titer fold-change")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 27 rows containing non-finite values (stat_smooth).
## Warning: Removed 27 rows containing missing values (geom_point).

# if we take away the sample with the highest Cook's distance mergedData  
outlier <- which(cooks.distance(fit) == max(cooks.distance(fit))) 
subsetData <- mergedData[ - as.numeric(names(outlier)), ]
bivScatter(data1 = subset(subsetData, Cohort == "Healthy"), data2 = subset(subsetData, Cohort == "aPD1") , 
           name1 = "Healthy", name2 = "aPD1", xData = "FCtfh_oW", yData = "FChai_late", fillParam = "Cohort", title = "FC HAI vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "H1N1pdm09 titer fold-change") #+ scale_y_continuous(trans = "log2", breaks = c(1,2^(1:8))) + coord_cartesian(ylim=c(0.1,128), xlim = c(0,7))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 27 rows containing non-finite values (stat_smooth).

## Warning: Removed 27 rows containing missing values (geom_point).

summary(fit <- lm(FChai_late ~ FCtfh_oW   + Cohort,   data = mergedData))
## 
## Call:
## lm(formula = FChai_late ~ FCtfh_oW + Cohort, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6629 -0.1863 -0.0275  0.1152  1.3773 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.19797    0.06612  18.119  < 2e-16 ***
## FCtfh_oW    -0.06069    0.03997  -1.518    0.131    
## CohortaPD1   0.53752    0.07753   6.933 1.91e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4021 on 126 degrees of freedom
##   (44 observations deleted due to missingness)
## Multiple R-squared:  0.2788, Adjusted R-squared:  0.2673 
## F-statistic: 24.35 on 2 and 126 DF,  p-value: 1.145e-09
bivScatter(data1 = subset(mergedData, Cohort == "Healthy"), data2 = subset(mergedData, Cohort == "aPD1") , 
           name1 = "Healthy", name2 = "aPD1", xData = "FCPB_oW", yData = "FChai_late", fillParam = "Cohort", title = "FC HAI vs FC PB", xLabel = "PB fold-change", 
           yLabel = "H1N1pdm09 titer fold-change") #+ scale_y_continuous(trans = "log2", breaks = c(1,2^(1:8))) + coord_cartesian(ylim=c(0.1,128), xlim = c(0,7))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 39 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 36 rows containing non-finite values (stat_smooth).
## Warning: Removed 39 rows containing missing values (geom_point).
## Warning: Removed 36 rows containing missing values (geom_point).

summary(fit <- lm(FChai_late ~ FCPB_oW   + Cohort,   data = mergedData))
## 
## Call:
## lm(formula = FChai_late ~ FCPB_oW + Cohort, data = mergedData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.88457 -0.23732 -0.04898  0.27767  1.20308 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.98161    0.08930  10.992  < 2e-16 ***
## FCPB_oW      0.04754    0.02336   2.035   0.0453 *  
## CohortaPD1   0.45625    0.10801   4.224 6.44e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4725 on 78 degrees of freedom
##   (92 observations deleted due to missingness)
## Multiple R-squared:  0.2601, Adjusted R-squared:  0.2412 
## F-statistic: 13.71 on 2 and 78 DF,  p-value: 7.887e-06
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("H1N1pdm09.HAI.titer")); melted <- melted[ which( !is.na(melted$value) ),]
overTime <- aggregate( melted$value, by= list(melted$variable, melted$TimeCategory, melted$Cohort), FUN=mean, na.rm = T); overTime
##               Group.1  Group.2 Group.3        x
## 1 H1N1pdm09.HAI.titer baseline Healthy 289.4815
## 2 H1N1pdm09.HAI.titer  oneWeek Healthy 352.0000
## 3 H1N1pdm09.HAI.titer     late Healthy 402.0741
## 4 H1N1pdm09.HAI.titer baseline    aPD1 129.3333
## 5 H1N1pdm09.HAI.titer  oneWeek    aPD1 265.6000
## 6 H1N1pdm09.HAI.titer     late    aPD1 420.0000
aPD1_seroprot <- length(which(melted$value > 40 & melted$Cohort == "aPD1" & melted$TimeCategory == "late") )
aPD1_total <- length(which(melted$Cohort == "aPD1" & melted$TimeCategory == "late") )
HC_seroprot <- length(which(melted$value > 40 & melted$Cohort == "Healthy" & melted$TimeCategory == "late") )
HC_total <- length(which(melted$Cohort == "Healthy" & melted$TimeCategory == "late") )
continTable <- matrix(c(aPD1_seroprot, aPD1_total - aPD1_seroprot, HC_seroprot, HC_total - HC_seroprot), ncol=2); continTable
##      [,1] [,2]
## [1,]   23   23
## [2,]    1    4
fisher.test(continTable)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  continTable
## p-value = 0.3539
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##    0.3507695 205.1583228
## sample estimates:
## odds ratio 
##   3.904467
seroprot <- data.frame( Cohort = c("Healthy", "aPD1"), Seroprot = c(HC_seroprot / HC_total, aPD1_seroprot / aPD1_total))
seroprot$Cohort <- factor(seroprot$Cohort, levels = c("Healthy", "aPD1"))
ggplot(data=seroprot, aes(x=Cohort, y=Seroprot, fill=Cohort,width=0.6)) + scale_fill_manual(values = c("#7FAEDB", "#FFB18C")) +  
  geom_bar( position = position_dodge(), stat = "identity", color="black",size=0.1) + ggtitle("Seroprotection") + ylab("Proportion seroprotected") +  theme_bw() +
  theme(axis.text = element_text(size=28,hjust = 0.5,color="black"), axis.title = element_text(size=28,hjust = 0.5), axis.title.x = element_blank(), plot.title = element_text(size=32,hjust = 0.5)) + 
  theme(legend.position = "none", axis.text.x = element_text(angle=45,hjust=1,vjust=1)) + 
  coord_cartesian(ylim = c(0,1)) + scale_y_continuous(breaks = seq(0,1,0.1))

# ggsave (filename = "D:/Pembro-Fluvac/Analysis/Images/Seroprotection_byCohort.pdf", device="pdf", width=4)

prePostTimeAveraged(melted, title = "HAI responses", xLabel = NULL, yLabel = "H1N1pdm09 HAI titer") + scale_y_continuous(trans = 'log2', breaks=c(2^(2:14)))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAIresponsesTimeAveraged.pdf", device = "pdf", width=7, height=7)
Anova(fit <- lm( value ~ Cohort * TimeCategory, data = melted)); tukey_hsd(fit)
## Anova Table (Type II tests)
## 
## Response: value
##                       Sum Sq  Df F value   Pr(>F)   
## Cohort                240742   1  2.4645 0.118563   
## TimeCategory         1085493   2  5.5562 0.004707 **
## Cohort:TimeCategory   213572   2  1.0932 0.337816   
## Residuals           14554664 149                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 19 x 9
##    term  group1 group2 null.value estimate conf.low conf.high   p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
##  1 Coho~ Healt~ aPD1            0    -87.4   -187.       11.9 0.084  
##  2 Time~ basel~ oneWe~          0    101.     -44.6     247.  0.231  
##  3 Time~ basel~ late            0    200.      57.8     343.  0.00315
##  4 Time~ oneWe~ late            0     99.2    -50.4     249.  0.262  
##  5 Coho~ Healt~ aPD1:~          0   -160.    -400.       79.2 0.387  
##  6 Coho~ Healt~ Healt~          0     62.5   -183.      308.  0.977  
##  7 Coho~ Healt~ aPD1:~          0    -23.9   -290.      242.  1      
##  8 Coho~ Healt~ Healt~          0    113.    -133.      358.  0.772  
##  9 Coho~ Healt~ aPD1:~          0    131.    -123.      384.  0.672  
## 10 Coho~ aPD1:~ Healt~          0    223.     -16.7     462.  0.0842 
## 11 Coho~ aPD1:~ aPD1:~          0    136.    -124.      397.  0.658  
## 12 Coho~ aPD1:~ Healt~          0    273.      33.4     512.  0.0156 
## 13 Coho~ aPD1:~ aPD1:~          0    291.      43.5     538.  0.0111 
## 14 Coho~ Healt~ aPD1:~          0    -86.4   -353.      180.  0.936  
## 15 Coho~ Healt~ Healt~          0     50.1   -196.      296.  0.992  
## 16 Coho~ Healt~ aPD1:~          0     68.    -185.      321.  0.971  
## 17 Coho~ aPD1:~ Healt~          0    136.    -130.      403.  0.677  
## 18 Coho~ aPD1:~ aPD1:~          0    154.    -119.      428.  0.579  
## 19 Coho~ Healt~ aPD1:~          0     17.9   -235.      271.  1      
## # ... with 1 more variable: p.adj.signif <chr>
subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(data = subsetData, xData = "TimeCategory", yData = "H1N1pdm09.HAI.titer", fillParam = "Cohort", title = "HAI titers over time", xLabel = "TimeCategory",
            yLabel = "HAI titer", groupby = "Subject") + scale_y_continuous(trans='log2', breaks=2^(2:13))
## Warning: Removed 1 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAItiter_late_byCohort_persubject.pdf", device="pdf", width=5.5)
# 
# subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
# twoSampleBar(data=subsetData, xData="Cohort", yData="H1N1pdm09.HAI.titer", fillParam="Cohort", batch = "Year",title="All yrs: HAI titers at d0", yLabel="H1N1pdm09 titer") + 
#   scale_y_continuous(trans='log2', breaks=c(2^(2:14) ) ) +  coord_cartesian(ylim = c(4,8192))
# subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "late")
# twoSampleBar(data=subsetData, xData="Cohort", yData="H1N1pdm09.HAI.titer", fillParam="Cohort", batch="Year", title="Late HAI titers", yLabel="H1N1pdm09 titer") + 
#   scale_y_continuous(trans='log2' , breaks=c(2^(2:15))) + coord_cartesian(ylim = c(4,16000))
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAItiter_late_byCohort.pdf", device="pdf", width=4.5)

subsetData <- subset(mergedData, Cohort != "nonPD1" )
seroconv <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("H1N1pdm09.HAI.titer")); 
seroconv$FC <- seroconv$`late`/seroconv$`baseline`
twoSampleBar(data=seroconv, xData="Cohort", yData="FC", fillParam="Cohort", title="Seroconversion", yLabel="Seroconversion factor") + 
  scale_y_continuous(trans='log2',breaks=c(2^(0:14)), limits = c(1,175))
## Warning: Removed 12 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/SeroconversionFactor.pdf", device = "pdf", width=4.5)


oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$CD27hiCD38hi_..FreqParent <-  oneWeek$CD27hiCD38hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent /100
subsetData1 <- subset(oneWeek, Cohort == "Healthy" & Year == "3" )    
subsetData2 <- subset(oneWeek, Cohort == "aPD1" & Year == "3" & cTfh_ICOShiCD38hi_..FreqParent > 1)    #  *****   OUTLIER REMOVED
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD27hiCD38hi_..FreqParent", yData="FChai_late", 
           fillParam = "Cohort", title = "HAI vs PB response at oneWeek", xLabel= "Plasmablast d7 frequency (freq CD19)", yLabel = "Fold-change HAI (late)") #+ scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).

univScatter(data = subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek"), yData = "FChai_late", xData = "CD19hi_CD23hi...FreqParent", fillParam = "dummy", 
            title = "HAI vs CD19+CD23+", yLabel= "HAI titer fold-change", xLabel = "CD23+ (% CD19+)")  
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).

univScatter(data = subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"), yData = "FChai_late", xData = "CD19hi_CD23hi...FreqParent", fillParam = "dummy", 
            title = "HAI vs CD19+CD23+", yLabel= "HAI titer fold-change", xLabel = "CD23+ (% CD19+)")  
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 29 rows containing non-finite values (stat_smooth).
## Warning: Removed 29 rows containing missing values (geom_point).

subsetData1 <- subset(mergedData, Cohort == "Healthy" & TimeCategory == "oneWeek"); subsetData2 <- subset(mergedData, Cohort == "aPD1" & TimeCategory == "oneWeek")
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD19hi_CD23hi...FreqParent", yData="FChai_late", 
           fillParam = "Cohort", title = "HAI vs CD23 at oneWeek", xLabel= "CD23+ (% CD19)", yLabel = "Fold-change HAI (late)") #+ scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (geom_point).

subsetData1 <- subset(mergedData, Cohort == "Healthy" & TimeCategory == "baseline"); subsetData2 <- subset(mergedData, Cohort == "aPD1" & TimeCategory == "baseline")
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD19hi_CD23hi...FreqParent", yData="FChai_late", 
           fillParam = "Cohort", title = "HAI vs CD23 at baseline", xLabel= "CD23+ (% CD19)", yLabel = "Fold-change HAI (late)") #+ scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_point).

———– CXCL13 ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort','Year'), measure.vars = c("Plasma.CXCL13..pg.mL."))
prePostTimeAveraged(melted, title = "CXCL13", xLabel = NULL, yLabel = "Plasma CXCL13 (pg/mL)") + scale_y_continuous(breaks=seq(0,200,5), limits=c(50,110))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_TimeAveraged.pdf", device="pdf", width=4)
summary( fit <- aov(value ~ Cohort+TimeCategory + Cohort:TimeCategory, data=melted ) );  tukey_hsd(fit)
##                      Df Sum Sq Mean Sq F value  Pr(>F)   
## Cohort                1   1988    1988   3.289 0.07178 . 
## TimeCategory          2   7206    3603   5.961 0.00324 **
## Cohort:TimeCategory   2   4476    2238   3.702 0.02698 * 
## Residuals           148  89458     604                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
## # A tibble: 19 x 9
##    term  group1 group2 null.value estimate conf.low conf.high   p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
##  1 Coho~ Healt~ aPD1            0   7.20     -0.645     15.0  0.0718 
##  2 Time~ basel~ oneWe~          0  14.6       3.18      26.1  0.00824
##  3 Time~ basel~ late            0  -0.376   -11.7       10.9  0.997  
##  4 Time~ oneWe~ late            0 -15.0     -26.8       -3.20 0.00865
##  5 Coho~ Healt~ aPD1:~          0   3.14    -15.7       22.0  0.997  
##  6 Coho~ Healt~ Healt~          0   5.22    -14.1       24.5  0.97   
##  7 Coho~ Healt~ aPD1:~          0  29.5       8.60      50.5  0.00105
##  8 Coho~ Healt~ Healt~          0   1.41    -17.9       20.7  1      
##  9 Coho~ Healt~ aPD1:~          0   0.0782  -20.1       20.2  1      
## 10 Coho~ aPD1:~ Healt~          0   2.09    -16.7       20.9  1      
## 11 Coho~ aPD1:~ aPD1:~          0  26.4       5.91      46.9  0.00377
## 12 Coho~ aPD1:~ Healt~          0  -1.73    -20.6       17.1  1      
## 13 Coho~ aPD1:~ aPD1:~          0  -3.06    -22.7       16.6  0.998  
## 14 Coho~ Healt~ aPD1:~          0  24.3       3.37      45.3  0.0128 
## 15 Coho~ Healt~ Healt~          0  -3.81    -23.1       15.5  0.993  
## 16 Coho~ Healt~ aPD1:~          0  -5.15    -25.3       15.0  0.977  
## 17 Coho~ aPD1:~ Healt~          0 -28.1     -49.1       -7.19 0.00215
## 18 Coho~ aPD1:~ aPD1:~          0 -29.5     -51.2       -7.76 0.00185
## 19 Coho~ Healt~ aPD1:~          0  -1.33    -21.5       18.8  1      
## # ... with 1 more variable: p.adj.signif <chr>
subsetData <- subset(melted, TimeCategory == "oneWeek")
rownames(subsetData) <- seq(1:nrow(subsetData))
twoSampleBar(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="CXCL13\nOne week", yLabel="plasma CXCL13 (pg/mL)") + 
  scale_y_continuous(limits = c(0,250))
## Warning: Removed 1 rows containing missing values (geom_point).

aggregate( subsetData, by= list(subsetData$variable, subsetData$TimeCategory, subsetData$Cohort), FUN=mean, na.rm = T)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
##                 Group.1 Group.2 Group.3 Subject TimeCategory Cohort     Year
## 1 Plasma.CXCL13..pg.mL. oneWeek Healthy      NA           NA     NA 2.555556
## 2 Plasma.CXCL13..pg.mL. oneWeek    aPD1      NA           NA     NA 2.666667
##   variable    value
## 1       NA 61.04413
## 2       NA 85.35924
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_oW.pdf", device="pdf",  width = 4)


FC_response <- dcast( subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" ), `Subject`+`Cohort`~`TimeCategory`, value.var = c("Plasma.CXCL13..pg.mL.")) 
FC_response$FCcxcl13 <- FC_response$oneWeek / FC_response$baseline
t.test(FC_response[which(FC_response$Cohort == "aPD1"), "baseline"], FC_response[which(FC_response$Cohort == "aPD1"), "oneWeek"], paired = T)
## 
##  Paired t-test
## 
## data:  FC_response[which(FC_response$Cohort == "aPD1"), "baseline"] and FC_response[which(FC_response$Cohort == "aPD1"), "oneWeek"]
## t = -5.5658, df = 19, p-value = 2.283e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -32.51086 -14.74144
## sample estimates:
## mean of the differences 
##               -23.62615
t.test(FC_response[which(FC_response$Cohort == "Healthy"), "baseline"], FC_response[which(FC_response$Cohort == "Healthy"), "oneWeek"], paired = T)
## 
##  Paired t-test
## 
## data:  FC_response[which(FC_response$Cohort == "Healthy"), "baseline"] and FC_response[which(FC_response$Cohort == "Healthy"), "oneWeek"]
## t = -1.4723, df = 26, p-value = 0.1529
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -12.518866   2.069751
## sample estimates:
## mean of the differences 
##               -5.224558
twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek"), xData="Cohort", yData="FCCXCL13_oW", fillParam="Cohort",FCplot =T, 
             title="CXCL13", yLabel="Fold-change at one week")+ scale_y_continuous(limits=c(0,3), breaks = seq(0,50,0.5))
## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_foldChange_oW.pdf", width = 4)


subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(subsetData, xData = "TimeCategory", yData = "Plasma.CXCL13..pg.mL.", fillParam = "Cohort", title = "CXCL13", xLabel = "TimeCategory",
            yLabel = "Plasma CXCL13 (pg/mL)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,20))     # limits = c(0,45)
## Warning: Removed 2 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_prePostTime.pdf", device="pdf")


univScatter(data = subset(mergedData, TimeCategory == "baseline" & Cohort == "aPD1"), yData = "Plasma.CXCL13..pg.mL.",  fillParam = "dummy",
            xData = "IgDlo_CD71hi_CD20loCD71hi.CD23hi...FreqParent", yLabel = "CXCL13 (pg/mL)", xLabel = "CD23hi (% ASC)", title = "CXCL13 vs CD23+ ASC at bL")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 14 rows containing missing values (geom_point).

univScatter(data = subset(mergedData, TimeCategory == "late" & Cohort == "aPD1"), yData = "Plasma.CXCL13..pg.mL.",  fillParam = "dummy",
            xData = "IgDlo_CD71hi_CD20loCD71hi.CD23hi...FreqParent", yLabel = "CXCL13 (pg/mL)", xLabel = "CD23hi (% ASC)", title = "CXCL13 vs CD23+ ASC at bL")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

univScatter(data = subset(mergedData, TimeCategory == "baseline" & Cohort == "aPD1"), xData = "Plasma.CXCL13..pg.mL.",  fillParam = "dummy", position = 'right',
            yData = "FCCXCL13_oW", xLabel = "baseline CXCL13 (pg/mL)", yLabel = "Fold-change CXCL13", title = "CXCL13 in aPD1") + scale_fill_manual(values=c("#FFB18C"))+
  scale_y_continuous(breaks = seq(0,5,1),limits=c(0,3))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_bL-vs-FC_aPD1.pdf")


univScatter(data = subset(mergedData, TimeCategory == "late" & Cohort == "aPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent!=0), xData = "Plasma.CXCL13..pg.mL.",  fillParam = "dummy",
            yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", xLabel = "baseline CXCL13 (pg/mL)", yLabel = "ASC (% CD71+)", title = "ASC vs CXCL13 at bL")
## `geom_smooth()` using formula 'y ~ x'

univScatter(data = mergedData, yData = "FCCXCL13_oW", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "AllYrs: Cycle of IT vs CXCL13 FC", yLabel= "CXCL13 FoldChange", xLabel = "Cycle of immunotherapy")  + scale_y_continuous(limits = c(0,4))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 155 rows containing non-finite values (stat_smooth).
## Warning: Removed 155 rows containing missing values (geom_point).

univScatter(data = mergedData, yData = "FCCXCL13_oW", xData = "CD27hiCD38hi_..FreqParent", fillParam = "dummy", 
            title = "AllYrs: PB response vs CXCL13 FC", yLabel= "CXCL13 FoldChange", xLabel = "CD27+CD38+ frequency") # + coord_cartesian(ylim = c(0,2))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 92 rows containing non-finite values (stat_smooth).
## Warning: Removed 92 rows containing missing values (geom_point).

univScatter(data = mergedData, yData = "FCPB_oW", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "AllYrs: Cycle of IT vs PB FC", yLabel= "PB FoldChange", xLabel = "Cycle of immunotherapy")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 157 rows containing non-finite values (stat_smooth).
## Warning: Removed 157 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
crossTimeCategory <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c('Plasma.CXCL13..pg.mL.' , 'H1N1pdm09.HAI.titer') )
crossTimeCategory2 <- dcast(crossTimeCategory, Subject + Cohort ~ TimeCategory + variable)
univScatter(crossTimeCategory2, xData = "baseline_Plasma.CXCL13..pg.mL.", yData="late_H1N1pdm09.HAI.titer", fillParam = NULL, 
            title = "All yrs: Late HAI vs baseline CXCL13", xLabel= "baseline Plasma CXCL13 (pg/mL)", yLabel = "Late HAI titer - H1N1pdm09")   # nonstd appearance due to lack of fillParam
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).

univScatter(crossTimeCategory2, xData = "oneWeek_Plasma.CXCL13..pg.mL.", yData="late_H1N1pdm09.HAI.titer", fillParam = NULL, 
            title = "All yrs: Late HAI vs d7 CXCL13", xLabel= "oneWeek Plasma CXCL13 (pg/mL)", yLabel = "Late HAI titer - H1N1pdm09")   # nonstd appearance due to lack of fillParam
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).

summary(lm( late_H1N1pdm09.HAI.titer ~ oneWeek_Plasma.CXCL13..pg.mL. + baseline_H1N1pdm09.HAI.titer, data=crossTimeCategory2))
## 
## Call:
## lm(formula = late_H1N1pdm09.HAI.titer ~ oneWeek_Plasma.CXCL13..pg.mL. + 
##     baseline_H1N1pdm09.HAI.titer, data = crossTimeCategory2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -535.32 -196.72  -51.43  135.27  664.92 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   118.9968   127.4315   0.934    0.356    
## oneWeek_Plasma.CXCL13..pg.mL.   2.0449     1.5736   1.300    0.201    
## baseline_H1N1pdm09.HAI.titer    0.7548     0.1529   4.937 1.37e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 296.3 on 41 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.3763, Adjusted R-squared:  0.3459 
## F-statistic: 12.37 on 2 and 41 DF,  p-value: 6.257e-05
summary(lm( Plasma.CXCL13..pg.mL. ~ Cohort + Year + TimeCategory, data=mergedData))
## 
## Call:
## lm(formula = Plasma.CXCL13..pg.mL. ~ Cohort + Year + TimeCategory, 
##     data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.786 -14.205  -2.952   5.447 146.105 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          42.0682     8.8791   4.738 4.69e-06 ***
## CohortaPD1            8.1888     3.9491   2.074  0.03970 *  
## CohortnonPD1          6.7758     7.2866   0.930  0.35381    
## Year                  4.4333     3.2063   1.383  0.16866    
## TimeCategoryoneWeek  14.2214     4.6535   3.056  0.00262 ** 
## TimeCategorylate     -0.3203     4.5067  -0.071  0.94343    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.38 on 162 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.1008, Adjusted R-squared:  0.0731 
## F-statistic: 3.634 on 5 and 162 DF,  p-value: 0.003835
summary(lm( FCCXCL13_oW ~ Cohort + FCPB_oW + FChai_late, data=mergedData))
## 
## Call:
## lm(formula = FCCXCL13_oW ~ Cohort + FCPB_oW + FChai_late, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8013 -0.2238 -0.1329  0.1885  1.1130 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.88302    0.12836   6.879 1.41e-09 ***
## CohortaPD1   0.34844    0.10778   3.233  0.00181 ** 
## FCPB_oW     -0.02106    0.02158  -0.976  0.33234    
## FChai_late   0.26501    0.10193   2.600  0.01118 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4254 on 77 degrees of freedom
##   (92 observations deleted due to missingness)
## Multiple R-squared:  0.2844, Adjusted R-squared:  0.2565 
## F-statistic:  10.2 on 3 and 77 DF,  p-value: 9.887e-06
bivScatter(data1 = subset(subsetData, Cohort == "Healthy" & TimeCategory == "baseline"), data2 = subset(subsetData, Cohort == "aPD1" & TimeCategory == "baseline"), 
           name1 = "HC", name2 = "aPD1", xData = "cTfh_ICOSloCD38lo_Foxp3hi...FreqParent", yData="FCCXCL13_oW", 
           fillParam = "Cohort", title = "CXCL13 FC vs Tfr at bL", xLabel= "ICOSloCD38lo Foxp3hi at bL", yLabel = "Fold-change CXCL13") + 
  scale_y_continuous(breaks=seq(0,99,1), limits = c(0,4))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

bivScatter(data1 = subset(subsetData, Cohort == "Healthy" & TimeCategory == "baseline"), data2 = subset(subsetData, Cohort == "aPD1" & TimeCategory == "baseline"), 
           name1 = "HC", name2 = "aPD1", xData = "FChai_late", yData="FCCXCL13_oW", 
           fillParam = "Cohort", title = "CXCL13 FC vs HAI FC", xLabel= "FChai_late", yLabel = "Fold-change CXCL13") + 
  scale_y_continuous(breaks=seq(0,99,1), limits = c(0,4))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).

———– glycosylation data ——————–

# ***************************    Total afucosylation  *******************************
subsetData <- subset(mergedData, Cohort != "nonPD1" )

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.afucosylated", fillParam = "Cohort", title = "IgG1 afucosylation", xLabel = "TimeCategory",
            yLabel = "anti-H1 Afucosylation (% abund)", groupby = "Subject")  + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1afucosylation_overTime.pdf", width=6)

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.afucosylated", fillParam = "Cohort", title = "IgG2 afucosylation over time", xLabel = "TimeCategory",
            yLabel = "Afucosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30))
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.afucosylated", fillParam = "Cohort", title = "IgG3/4 afucosylation over time", xLabel = "TimeCategory",
            yLabel = "Afucosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30))
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ***************************    Total Bisection   *******************************
prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.Bisection", fillParam = "Cohort", title = "IgG1 bisection", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG1", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1bisection_overTime.pdf", width=6)

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.Bisection", fillParam = "Cohort", title = "IgG2 bisection over time", xLabel = "TimeCategory",
            yLabel = "Bisection (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.Bisection", fillParam = "Cohort", title = "IgG3/4 bisection over time", xLabel = "TimeCategory",
            yLabel = "Bisection (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"), xData="Cohort", yData="IgG1_Total.Bisection", fillParam="Cohort", 
             title="Baseline", yLabel="% anti-H1 IgG1")+ scale_y_continuous(limits = c(0,20), breaks = seq(0,50,5))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1bisection_bL.pdf", width = 4)
twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek"), xData="Cohort", yData="IgG1_Total.Bisection", fillParam="Cohort", 
             title="One Week", yLabel="% anti-H1 IgG1")+ scale_y_continuous(limits = c(0,20), breaks = seq(0,50,5))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1bisection_oW.pdf", width=4)


univScatter(data = subset(mergedData, TimeCategory == "baseline"), yData = "IgG1_Total.Bisection", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "IgG1 bisected at baseline", yLabel= "IgG1 bisected", xLabel = "Cycle of immunotherapy" )
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 36 rows containing non-finite values (stat_smooth).
## Warning: Removed 36 rows containing missing values (geom_point).

# ***************************    Galactosylation *******************************

melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort','Year'), measure.vars = c("IgG1_Total.Galactosylation..G1.G2."))
prePostTimeAveraged(melted, title = "Total galactosylation", xLabel = NULL, yLabel = "% anti-H1 IgG1") + 
  scale_y_continuous(breaks=seq(0,99,1), limits = c(90,100))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_totGalactosylation_overTime.pdf")
# melted %>%  anova_test(value ~ TimeCategory+Cohort )                        # two-way anova without interaction term 
Anova(fit <- lm( value ~ Cohort * TimeCategory, data = melted)); tukey_hsd(fit)
## Anova Table (Type II tests)
## 
## Response: value
##                      Sum Sq  Df  F value Pr(>F)    
## Cohort              204.313   1 138.1247 <2e-16 ***
## TimeCategory          5.779   2   1.9533 0.1454    
## Cohort:TimeCategory   3.070   2   1.0377 0.3568    
## Residuals           221.879 150                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 19 x 9
##    term  group1 group2 null.value estimate conf.low conf.high    p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>    <dbl>
##  1 Coho~ Healt~ aPD1            0 -2.32e+0   -2.70     -1.93  0.      
##  2 Time~ basel~ oneWe~          0  3.12e-1   -0.252     0.876 3.91e- 1
##  3 Time~ basel~ late            0  4.49e-1   -0.106     1.00  1.38e- 1
##  4 Time~ oneWe~ late            0  1.36e-1   -0.443     0.715 8.43e- 1
##  5 Coho~ Healt~ aPD1:~          0 -2.64e+0   -3.57     -1.70  1.82e-12
##  6 Coho~ Healt~ Healt~          0  1.14e-1   -0.841     1.07  9.99e- 1
##  7 Coho~ Healt~ aPD1:~          0 -2.13e+0   -3.15     -1.11  1.85e- 7
##  8 Coho~ Healt~ Healt~          0  1.15e-1   -0.841     1.07  9.99e- 1
##  9 Coho~ Healt~ aPD1:~          0 -1.85e+0   -2.83     -0.864 3.47e- 6
## 10 Coho~ aPD1:~ Healt~          0  2.75e+0    1.82      3.68  2.70e-13
## 11 Coho~ aPD1:~ aPD1:~          0  5.03e-1   -0.496     1.50  6.94e- 1
## 12 Coho~ aPD1:~ Healt~          0  2.75e+0    1.82      3.68  2.68e-13
## 13 Coho~ aPD1:~ aPD1:~          0  7.87e-1   -0.175     1.75  1.76e- 1
## 14 Coho~ Healt~ aPD1:~          0 -2.25e+0   -3.27     -1.23  3.61e- 8
## 15 Coho~ Healt~ Healt~          0  5.89e-4   -0.955     0.956 1.00e+ 0
## 16 Coho~ Healt~ aPD1:~          0 -1.96e+0   -2.95     -0.978 7.05e- 7
## 17 Coho~ aPD1:~ Healt~          0  2.25e+0    1.23      3.27  3.58e- 8
## 18 Coho~ aPD1:~ aPD1:~          0  2.84e-1   -0.765     1.33  9.70e- 1
## 19 Coho~ Healt~ aPD1:~          0 -1.96e+0   -2.95     -0.979 6.99e- 7
## # ... with 1 more variable: p.adj.signif <chr>
prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.Galactosylation..G1.G2.", fillParam = "Cohort", title = "IgG1 total galactosylation", 
            xLabel = "TimeCategory", yLabel = "% anti-H1 IgG1", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,100) ) + 
  coord_cartesian(ylim = c(90,100))
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_Tot-galactosylation_overTime_perSubject.pdf", width=6.5)

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.Galactosylation..G1.G2.", fillParam = "Cohort", title = "IgG2 total galactosylation", 
            xLabel = "TimeCategory", yLabel = "% anti-H1 IgG2", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,100) )+ 
  coord_cartesian(ylim = c(80,100))
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG2_Tot-galactosylation_overTime_perSubject.pdf", width=6.5)

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.Galactosylation..G1.G2.", fillParam = "Cohort", title = "IgG3/4 total galactosylation", 
            xLabel = "TimeCategory", yLabel = "% anti-H1 IgG3/4", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,100) )+ 
  coord_cartesian(ylim = c(50,100))
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG34_Tot-galactosylation_overTime_perSubject.pdf", width=6.5)


twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"), xData="Cohort", yData="IgG1_Total.Galactosylation..G1.G2.", fillParam="Cohort", 
             title="Total galactosylation\nbaseline", yLabel="% anti-H1 IgG1") + 
  scale_y_continuous(breaks = seq(0,100,2)) +   coord_cartesian(ylim = c(88,100)) 

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_totGalactosylation_bL.pdf", width=4.5)

univScatter(data = subset(mergedData, TimeCategory == "baseline"), yData = "IgG1_Total.Galactosylation..G1.G2.", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "Total galact at baseline", yLabel= "IgG1 total galac", xLabel = "Cycle of immunotherapy" )
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 36 rows containing non-finite values (stat_smooth).

## Warning: Removed 36 rows containing missing values (geom_point).

univScatter(data = subset(mergedData, TimeCategory == "baseline" & Cohort == "aPD1"), xData = "IgG1_Total.Galactosylation..G1.G2.", yData = "Plasma.CXCL13..pg.mL.", fillParam = "dummy", 
            title = "Total galact vs CXCL13 at baseline", xLabel= "IgG1 total galac", yLabel = "CXCL13"  )
## `geom_smooth()` using formula 'y ~ x'

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_totGalactosylation_bL_vs_CXCL13.pdf", width=8)



prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.G0", fillParam = "Cohort", title = "IgG1 G0 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG1", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.G0", fillParam = "Cohort", title = "IgG2 G0 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG2", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.G0", fillParam = "Cohort", title = "IgG3/4 G0 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG3/4", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## Warning: Removed 5 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 5 row(s) containing missing values (geom_path).
## Warning: Removed 5 rows containing missing values (geom_point).

twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"), xData="Cohort", yData="IgG1_Total.G0", fillParam="Cohort", 
             title="G0 - baseline", yLabel="% anti-H1 IgG1") + scale_y_continuous(breaks = seq(0,100,2)) + 
  coord_cartesian(ylim = c(0,10))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_G0_Galactosylation_bL.pdf", width=4.5)


univScatter(data = subset(mergedData, TimeCategory == "baseline"), yData = "IgG1_Total.G0", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "IgG1 G0 galact at baseline", yLabel= "IgG1 G0 galac", xLabel = "Cycle of immunotherapy" )
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 36 rows containing non-finite values (stat_smooth).
## Warning: Removed 36 rows containing missing values (geom_point).

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.G1", fillParam = "Cohort", title = "IgG1 G1 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "G1 Galactosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,100) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.G1", fillParam = "Cohort", title = "IgG2 G1 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "G1 Galactosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,100) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.G1", fillParam = "Cohort", title = "IgG3/4 G1 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "G1 Galactosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,100) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"), xData="Cohort", yData="IgG1_Total.G1", fillParam="Cohort", 
             title="G1 - baseline", yLabel="% anti-H1 IgG1") + scale_y_continuous(limits = c(0,70), breaks = seq(0,100,10)) 

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_G1_Galactosylation_bL.pdf", width=4.5)

univScatter(data = subset(mergedData, TimeCategory == "baseline"), yData = "IgG1_Total.G1", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "IgG1 G1 galact at baseline", yLabel= "IgG1 G1 galac", xLabel = "Cycle of immunotherapy" )
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 36 rows containing non-finite values (stat_smooth).

## Warning: Removed 36 rows containing missing values (geom_point).

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.G2", fillParam = "Cohort", title = "IgG1 G2 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "G2 Galactosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,80) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.G2", fillParam = "Cohort", title = "IgG2 G2 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "G2 Galactosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,80) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.G2", fillParam = "Cohort", title = "IgG3/4 G2 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "G2 Galactosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,80) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"), xData="Cohort", yData="IgG1_Total.G2", fillParam="Cohort", 
             title="G2 - baseline", yLabel="% anti-H1 IgG1") + scale_y_continuous(limits = c(0,80), breaks = seq(0,100,10)) 

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_G2_Galactosylation_bL.pdf", width=4.5)




# ***************************    Sialylation *******************************

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort','Year'), measure.vars = c("IgG1_Total.sialylated"))
prePostTimeAveraged(melted, title = "Sialylation", xLabel = NULL, yLabel = "% anti-H1 IgG1") + scale_y_continuous(breaks=seq(0,99,2), limits = c(10,22))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_sialylation_overTime.pdf")

melted %>%  anova_test(value ~ TimeCategory+Cohort )                        # two-way anova without interaction term 
## Coefficient covariances computed by hccm()
## ANOVA Table (type II tests)
## 
##         Effect DFn DFd      F        p p<.05   ges
## 1 TimeCategory   2 152  0.349 7.06e-01       0.005
## 2       Cohort   1 152 21.531 7.46e-06     * 0.124
Anova(fit <- lm( value ~ Cohort * TimeCategory, data = melted)); tukey_hsd(fit)
## Anova Table (Type II tests)
## 
## Response: value
##                     Sum Sq  Df F value    Pr(>F)    
## Cohort               800.1   1 21.4416 7.846e-06 ***
## TimeCategory          25.9   2  0.3473    0.7072    
## Cohort:TimeCategory   51.1   2  0.6853    0.5055    
## Residuals           5597.4 150                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 19 x 9
##    term  group1 group2 null.value estimate conf.low conf.high   p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
##  1 Coho~ Healt~ aPD1            0   -4.57    -6.51     -2.64  6.59e-6
##  2 Time~ basel~ oneWe~          0    0.281   -2.55      3.11  9.70e-1
##  3 Time~ basel~ late            0    0.962   -1.83      3.75  6.93e-1
##  4 Time~ oneWe~ late            0    0.681   -2.23      3.59  8.44e-1
##  5 Coho~ Healt~ aPD1:~          0   -3.74    -8.42      0.941 1.98e-1
##  6 Coho~ Healt~ Healt~          0    1.47    -3.33      6.27  9.50e-1
##  7 Coho~ Healt~ aPD1:~          0   -4.81    -9.94      0.319 7.97e-2
##  8 Coho~ Healt~ Healt~          0    1.05    -3.74      5.85  9.88e-1
##  9 Coho~ Healt~ aPD1:~          0   -2.78    -7.73      2.17  5.84e-1
## 10 Coho~ aPD1:~ Healt~          0    5.20     0.526     9.88  1.97e-2
## 11 Coho~ aPD1:~ aPD1:~          0   -1.07    -6.09      3.94  9.90e-1
## 12 Coho~ aPD1:~ Healt~          0    4.79     0.114     9.47  4.12e-2
## 13 Coho~ aPD1:~ aPD1:~          0    0.956   -3.87      5.79  9.93e-1
## 14 Coho~ Healt~ aPD1:~          0   -6.28   -11.4      -1.15  7.11e-3
## 15 Coho~ Healt~ Healt~          0   -0.412   -5.21      4.39  1.00e+0
## 16 Coho~ Healt~ aPD1:~          0   -4.25    -9.20      0.699 1.37e-1
## 17 Coho~ aPD1:~ Healt~          0    5.87     0.736    11.0   1.50e-2
## 18 Coho~ aPD1:~ aPD1:~          0    2.03    -3.24      7.30  8.76e-1
## 19 Coho~ Healt~ aPD1:~          0   -3.84    -8.78      1.11  2.26e-1
## # ... with 1 more variable: p.adj.signif <chr>
twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"), xData="Cohort", yData="IgG1_Total.sialylated", fillParam="Cohort", 
             title="Sialylation\nbaseline", yLabel="% anti-H1 IgG1") + scale_y_continuous(limits = c(0,40), breaks = seq(0,50,5))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1sialylation_bL.pdf", width=4)
twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek"), xData="Cohort", yData="IgG1_Total.sialylated", fillParam="Cohort", 
             title="One week", yLabel="Sialylation (% anti-H1 IgG1)")+ scale_y_continuous(limits = c(0,40), breaks = seq(0,50,5))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1sialylation_oW.pdf", width=4)

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.sialylated", fillParam = "Cohort", title = "IgG1 sialylation", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG1", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,10), limits = c(0,40) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_sialylation_overTime_perSubject.pdf", width=6)
prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.sialylated", fillParam = "Cohort", title = "IgG2 sialylation", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG2", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,10), limits = c(0,40) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

 # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG2_sialylation_overTime_perSubject.pdf", width=6)
prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.sialylated", fillParam = "Cohort", title = "IgG3/4 sialylation", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG3/4", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,10), limits = c(0,40) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

 # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG34_sialylation_overTime_perSubject.pdf", width=6)


summary(lm( FChai_late ~ Cohort + Year + IgG1sial_oW, data=subsetData))
## 
## Call:
## lm(formula = FChai_late ~ Cohort + Year + IgG1sial_oW, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.82890 -0.21526 -0.04529  0.18127  1.38534 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.78591    0.30882   2.545   0.0121 *  
## CohortaPD1   0.48945    0.07300   6.704 5.83e-10 ***
## Year        -0.05049    0.06115  -0.826   0.4105    
## IgG1sial_oW  0.42385    0.24118   1.757   0.0812 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4039 on 128 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.2638, Adjusted R-squared:  0.2466 
## F-statistic: 15.29 on 3 and 128 DF,  p-value: 1.459e-08
summary(lm( IgG1sial_oW ~ Cohort + FCtfh_oW, data=subsetData))
## 
## Call:
## lm(formula = IgG1sial_oW ~ Cohort + FCtfh_oW, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29708 -0.06892 -0.01885  0.05596  0.75654 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.10985    0.02522  44.008   <2e-16 ***
## CohortaPD1  -0.01999    0.02963  -0.675    0.501    
## FCtfh_oW    -0.00640    0.01472  -0.435    0.664    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1595 on 134 degrees of freedom
##   (19 observations deleted due to missingness)
## Multiple R-squared:  0.00721,    Adjusted R-squared:  -0.007608 
## F-statistic: 0.4865 on 2 and 134 DF,  p-value: 0.6158
subsetData1 <- subset(oneWeek, Cohort == "Healthy" & Year == "3" )    
subsetData2 <- subset(oneWeek, Cohort == "aPD1" & Year == "3" & cTfh_ICOShiCD38hi_..FreqParent > 1)    #  *****   OUTLIER REMOVED
bivScatter(data1 = subset(subsetData, Cohort == "Healthy"), data2 = subset(subsetData, Cohort == "aPD1"), 
           name1 = "HC", name2 = "aPD1", xData = "IgG1sial_oW", yData="FChai_late", 
           fillParam = "Cohort", title = "HAI vs IgG1 sial FC", xLabel= "IgG1 sialylation fold-change", yLabel = "Fold-change HAI (late)") #+ scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 24 rows containing non-finite values (stat_smooth).
## Warning: Removed 24 rows containing missing values (geom_point).

bivScatter(data1 = subset(subsetData, Cohort == "Healthy"), data2 = subset(subsetData, Cohort == "aPD1"), 
           name1 = "HC", name2 = "aPD1", xData = "IgG1sial_oW", yData="FCPB_oW", 
           fillParam = "Cohort", title = "PB FC vs IgG1 sial FC", xLabel= "IgG1 sialylation fold-change", yLabel = "Fold-change Plasmablasts") #+ scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 39 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 39 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing missing values (geom_point).

bivScatter(data1 = subset(subsetData, Cohort == "Healthy"), data2 = subset(subsetData, Cohort == "aPD1"), 
           name1 = "HC", name2 = "aPD1", xData = "IgG1_Total.sialylated", yData="FCtfh_oW", 
           fillParam = "Cohort", title = "Tfh FC vs IgG1 sial", xLabel= "IgG1 sialylation total", yLabel = "Fold-change Tfh") #+ scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning: Removed 19 rows containing missing values (geom_point).

univScatter(data = subset(mergedData, TimeCategory == "oneWeek" & Cohort == "aPD1"), yData = "IgG1_Total.sialylated", xData = "FCtfh_oW", fillParam = "dummy", 
            title = "FCtfh_oW vs IgG1 sialylated", yLabel= "IgG1 sialylated", xLabel = "FCtfh oW" )
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

# prePostTimeGene(singleGeneData, xData = "TimeCategory" , yData = "value", fillParam = "Cohort", groupby = "Subject", title = "FUT8 in ABC", xLabel = "TimeCategory", 
#                 yLabel = "log2 counts", paired = F)

# subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory != "late" )
# 
# t_test(data = subset(subsetData, Cohort == "Healthy"), IgG1_Total.sialylated~ TimeCategory, paired=T)
# t_test(data = subset(subsetData, Cohort == "aPD1"), IgG1_Total.sialylated ~ TimeCategory, paired=T)

univScatter(data = subset(mergedData, TimeCategory == "baseline"), yData = "IgG1_Total.sialylated", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "Cycle of IT vs IgG1 sialylated", yLabel= "IgG1 sialylated", xLabel = "Cycle of immunotherapy" )
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 36 rows containing non-finite values (stat_smooth).
## Warning: Removed 36 rows containing missing values (geom_point).

bivScatter(data1 = subset(mergedData, TimeCategory == "baseline" & Cohort == "Healthy"), data2 = subset(mergedData, TimeCategory == "baseline" & Cohort == "aPD1"),
          name1 = "Healthy", name2 = "aPD1", xData = "IgG1_Total.Galactosylation..G1.G2.", yData = "IgG1_Total.sialylated", fillParam = "Cohort", 
          title = "Sialylation vs Galactosylation", xLabel = "Total galactosylation (% anti-H1 IgG)", yLabel = "Sialylation (% anti-H1 IgG") + 
  coord_cartesian(ylim = c(0,30), xlim = c(90,99))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_sial-vs-TotGalactosyl_correlation_twoCohorts.pdf", width=8)

 
# ***************************    Affinity   *******************************

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort','Year'), measure.vars = c("Lo.Hi.HA.affinity"))
prePostTimeAveraged(melted, title = "anti-HA Affinity", xLabel = NULL, yLabel = "Lo Hi HA affinity") + scale_y_continuous(breaks=seq(0,99,0.03))

# ggsave (filename = "D:/Pembro-Fluvac/Analysis/Images/antiHA-affinity_overTime_LoHi.pdf")
melted %>%  anova_test(value ~ TimeCategory+Cohort )                        # two-way anova without interaction term 
## Warning: NA detected in rows: 10.
## Removing this rows before the analysis.
## Coefficient covariances computed by hccm()
## ANOVA Table (type II tests)
## 
##         Effect DFn DFd      F        p p<.05   ges
## 1 TimeCategory   2 151 15.257 9.22e-07     * 0.168
## 2       Cohort   1 151 68.632 5.93e-14     * 0.312
Anova(fit <- lm( value ~ Cohort * TimeCategory, data = melted)); tukey_hsd(fit)
## Anova Table (Type II tests)
## 
## Response: value
##                      Sum Sq  Df F value    Pr(>F)    
## Cohort              0.47436   1 68.2280 7.281e-14 ***
## TimeCategory        0.21091   2 15.1677 1.010e-06 ***
## Cohort:TimeCategory 0.00772   2  0.5554     0.575    
## Residuals           1.03593 149                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 19 x 9
##    term  group1 group2 null.value estimate conf.low conf.high    p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>    <dbl>
##  1 Coho~ Healt~ aPD1            0  -0.115  -0.141    -0.0883  0.      
##  2 Time~ basel~ oneWe~          0   0.0452  0.00627   0.0841  1.83e- 2
##  3 Time~ basel~ late            0   0.0883  0.0503    0.126   4.92e- 7
##  4 Time~ oneWe~ late            0   0.0432  0.00325   0.0831  3.06e- 2
##  5 Coho~ Healt~ aPD1:~          0  -0.130  -0.193    -0.0658  4.24e- 7
##  6 Coho~ Healt~ Healt~          0   0.0314 -0.0341    0.0970  7.36e- 1
##  7 Coho~ Healt~ aPD1:~          0  -0.0694 -0.140     0.00158 5.93e- 2
##  8 Coho~ Healt~ Healt~          0   0.0734  0.00791   0.139   1.84e- 2
##  9 Coho~ Healt~ aPD1:~          0  -0.0263 -0.0938    0.0412  8.71e- 1
## 10 Coho~ aPD1:~ Healt~          0   0.161   0.0972    0.225   2.64e-10
## 11 Coho~ aPD1:~ aPD1:~          0   0.0602 -0.00931   0.130   1.31e- 1
## 12 Coho~ aPD1:~ Healt~          0   0.203   0.139     0.267   2.35e-14
## 13 Coho~ aPD1:~ aPD1:~          0   0.103   0.0374    0.169   1.76e- 4
## 14 Coho~ Healt~ aPD1:~          0  -0.101  -0.172    -0.0299  9.38e- 4
## 15 Coho~ Healt~ Healt~          0   0.0420 -0.0235    0.108   4.37e- 1
## 16 Coho~ Healt~ aPD1:~          0  -0.0577 -0.125     0.00980 1.40e- 1
## 17 Coho~ aPD1:~ Healt~          0   0.143   0.0719    0.214   5.48e- 7
## 18 Coho~ aPD1:~ aPD1:~          0   0.0432 -0.0297    0.116   5.28e- 1
## 19 Coho~ Healt~ aPD1:~          0  -0.0997 -0.167    -0.0322  5.03e- 4
## # ... with 1 more variable: p.adj.signif <chr>
univScatter(data = subsetData, xData = "Lo.Hi.HA.affinity", yData = "H1N1pdm09.HAI.titer", fillParam = "dummy", 
            title = "HAI titer incr with affinity", xLabel= "Lo Hi HA affinity", yLabel = "HAI titer - H1N1pdm09") + scale_y_continuous(trans = "log", breaks = 2^(1:12)) +
  coord_cartesian(ylim = c(2,4096))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave (filename = "D:/Pembro-Fluvac/Analysis/Images/NeutAbtiter_vs_antiHA-affinity_LoHi.pdf")

prePostTime(subsetData, xData = "TimeCategory", yData = "Lo.Hi.HA.affinity", fillParam = "Cohort", title = "anti-HA affinity over time", xLabel = "TimeCategory",
            yLabel = "Lo Hi HA affinity (anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,10,0.25), limits = c(0,1) )
## Warning: Removed 1 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave (filename = "D:/Pembro-Fluvac/Analysis/Images/antiHA-affinity_overTime_LoHi_perSubject.pdf", width=8)

twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"), xData="Cohort", yData="Lo.Hi.HA.affinity", fillParam="Cohort", 
             title="anti-HA affinity at baseline", yLabel="Lo Hi HA affinity (anti-H1)") + scale_y_continuous(breaks=seq(0,10,0.25), limits = c(0,1.1) )

twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek"), xData="Cohort", yData="Lo.Hi.HA.affinity", fillParam="Cohort", 
             title="anti-HA affinity at oneWeek", yLabel="Lo Hi HA affinity (anti-H1)") + scale_y_continuous(breaks=seq(0,10,0.25), limits = c(0,1.1) )
## Warning: Removed 1 rows containing missing values (geom_point).

twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "late"), xData="Cohort", yData="Lo.Hi.HA.affinity", fillParam="Cohort", 
             title="anti-HA affinity at late", yLabel="Lo Hi HA affinity (anti-H1)") + scale_y_continuous(breaks=seq(0,10,0.25), limits = c(0,1.1) )

FC_Affinity <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("Lo.Hi.HA.affinity"))
FC_Affinity$FC <- FC_Affinity$`late`/FC_Affinity$`baseline`
data=FC_Affinity; xData="Cohort"; yData="FC"; fillParam="Cohort"; title="anti-HA Affinity"; yLabel="fold-change (late / baseline)";
# overTime <- aggregate(x = data[,yData], by= list(Cohort = data$Cohort), FUN=mean, na.rm = T)
# names(overTime)[which(names(overTime) == 'x')] <- yData
justforttest <- data[, c(xData,yData)]
fit <- rstatix::t_test(justforttest, formula = as.formula(paste(colnames(justforttest)[2], "~", paste(colnames(justforttest)[1]), sep = "") ))
pValue <- fit$p
annotationInfo <- paste0("P = ", round(pValue, 2)); my_grob = grobTree(textGrob(annotationInfo, x=0.03,  y=0.93, hjust=0, gp=gpar(col="black", fontsize=28)))
ggplot(data=data, aes_string(x=xData, y=yData, fill=fillParam, width=0.6)) + scale_fill_manual(values = c("#7FAEDB", "#FFB18C")) + 
  geom_hline(yintercept=1, linetype="dashed", color = "black", size=0.5) + 
  geom_violin(draw_quantiles = 0.5) + ggbeeswarm::geom_quasirandom(size=7, pch=21, fill="black", color="white", alpha=0.35) + 
  ggtitle(title) + ylab(yLabel) +  theme_bw() +
  theme(axis.text = element_text(size=28,hjust = 0.5, color="black"), axis.title = element_text(size=28,hjust = 0.5), axis.title.x = element_blank(), 
        plot.title = element_text(size=32,hjust = 0.5), axis.text.x = element_text(angle=45, hjust=1,vjust=1)) + 
  annotation_custom(my_grob) + theme(legend.position = "none") + scale_y_continuous(breaks=seq(0,99,0.25), limits = c(0.75,1.75))
## Warning: Removed 6 rows containing non-finite values (stat_ydensity).
## Warning: Removed 6 rows containing missing values (position_quasirandom).

# ggsave (filename = "D:/Pembro-Fluvac/Analysis/Images/antiHA-affinity_FC_LoHi.pdf", width=4.25)


subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("X1.Kd.HA")); melted$value <- 1/melted$value
prePostTimeAveraged(melted, title = "HA EC50/Kd", xLabel = NULL, yLabel = "1 / Concentration (ug/mL)") + scale_y_continuous(breaks=seq(0,99,0.25))

summary(fit <- lm( data = mergedData, X1.Kd.HA ~ Cohort  + TimeCategory))
## 
## Call:
## lm(formula = X1.Kd.HA ~ Cohort + TimeCategory, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8864 -0.7640 -0.4373  0.2622  5.7613 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           0.6456     0.3647   1.770   0.0817 .
## CohortaPD1            0.5068     0.3984   1.272   0.2081  
## CohortnonPD1         -0.4517     0.8162  -0.553   0.5819  
## TimeCategoryoneWeek   1.1249     0.4932   2.281   0.0260 *
## TimeCategorylate      1.0483     0.4342   2.414   0.0187 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.54 on 62 degrees of freedom
##   (106 observations deleted due to missingness)
## Multiple R-squared:  0.1348, Adjusted R-squared:  0.07903 
## F-statistic: 2.416 on 4 and 62 DF,  p-value: 0.05812
summary(fit <- aov(formula = X1.Kd.HA ~ Cohort  + TimeCategory + Cohort:TimeCategory, data = mergedData)); tukey_hsd(fit) %>% print(n=42)
##                     Df Sum Sq Mean Sq F value Pr(>F)  
## Cohort               2   4.25   2.126   0.939 0.3969  
## TimeCategory         2  18.67   9.336   4.122 0.0212 *
## Cohort:TimeCategory  4  15.73   3.933   1.737 0.1543  
## Residuals           58 131.35   2.265                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 106 observations deleted due to missingness
## # A tibble: 42 x 9
##    term  group1 group2 null.value estimate conf.low conf.high  p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>  <dbl>
##  1 Coho~ Healt~ aPD1            0   0.357   -0.558      1.27  0.619 
##  2 Coho~ Healt~ nonPD1          0  -0.612   -2.52       1.30  0.724 
##  3 Coho~ aPD1   nonPD1          0  -0.968   -2.90       0.963 0.454 
##  4 Time~ basel~ oneWe~          0   1.09    -0.0485     2.22  0.0634
##  5 Time~ basel~ late            0   1.04     0.0219     2.06  0.0442
##  6 Time~ oneWe~ late            0  -0.0453  -1.22       1.13  0.995 
##  7 Coho~ Healt~ aPD1:~          0  -0.617   -2.52       1.29  0.98  
##  8 Coho~ Healt~ nonPD~          0  -0.595   -4.30       3.11  1     
##  9 Coho~ Healt~ Healt~          0   0.416   -1.61       2.44  0.999 
## 10 Coho~ Healt~ aPD1:~          0   1.55    -1.25       4.35  0.693 
## 11 Coho~ Healt~ nonPD~          0  -0.355   -5.40       4.69  1     
## 12 Coho~ Healt~ Healt~          0  -0.0118  -2.04       2.01  1     
## 13 Coho~ Healt~ aPD1:~          0   1.51    -0.517      3.53  0.304 
## 14 Coho~ Healt~ nonPD~          0  -0.377   -5.42       4.67  1     
## 15 Coho~ aPD1:~ nonPD~          0   0.0216  -3.64       3.69  1     
## 16 Coho~ aPD1:~ Healt~          0   1.03    -0.920      2.99  0.741 
## 17 Coho~ aPD1:~ aPD1:~          0   2.17    -0.583      4.91  0.236 
## 18 Coho~ aPD1:~ nonPD~          0   0.262   -4.76       5.28  1     
## 19 Coho~ aPD1:~ Healt~          0   0.605   -1.35       2.56  0.985 
## 20 Coho~ aPD1:~ aPD1:~          0   2.12     0.171      4.08  0.0233
## 21 Coho~ aPD1:~ nonPD~          0   0.240   -4.78       5.26  1     
## 22 Coho~ nonPD~ Healt~          0   1.01    -2.72       4.74  0.993 
## 23 Coho~ nonPD~ aPD1:~          0   2.14    -2.05       6.34  0.776 
## 24 Coho~ nonPD~ nonPD~          0   0.240   -5.70       6.18  1     
## 25 Coho~ nonPD~ Healt~          0   0.584   -3.14       4.31  1     
## 26 Coho~ nonPD~ aPD1:~          0   2.10    -1.62       5.83  0.671 
## 27 Coho~ nonPD~ nonPD~          0   0.219   -5.72       6.16  1     
## 28 Coho~ Healt~ aPD1:~          0   1.13    -1.70       3.96  0.931 
## 29 Coho~ Healt~ nonPD~          0  -0.771   -5.84       4.29  1     
## 30 Coho~ Healt~ Healt~          0  -0.428   -2.50       1.64  0.999 
## 31 Coho~ Healt~ aPD1:~          0   1.09    -0.976      3.16  0.744 
## 32 Coho~ Healt~ nonPD~          0  -0.793   -5.86       4.27  1     
## 33 Coho~ aPD1:~ nonPD~          0  -1.90    -7.32       3.52  0.967 
## 34 Coho~ aPD1:~ Healt~          0  -1.56    -4.39       1.27  0.697 
## 35 Coho~ aPD1:~ aPD1:~          0  -0.0412  -2.87       2.79  1     
## 36 Coho~ aPD1:~ nonPD~          0  -1.93    -7.35       3.50  0.965 
## 37 Coho~ nonPD~ Healt~          0   0.343   -4.72       5.41  1     
## 38 Coho~ nonPD~ aPD1:~          0   1.86    -3.20       6.93  0.957 
## 39 Coho~ nonPD~ nonPD~          0  -0.0218  -6.88       6.83  1     
## 40 Coho~ Healt~ aPD1:~          0   1.52    -0.548      3.59  0.321 
## 41 Coho~ Healt~ nonPD~          0  -0.365   -5.43       4.70  1     
## 42 Coho~ aPD1:~ nonPD~          0  -1.88    -6.95       3.18  0.954 
## # ... with 1 more variable: p.adj.signif <chr>
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/AntibodyAffinity_overTime.pdf", device="pdf", height=7, width=7)
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"); subsetData$X1.Kd.HA <- 1 / subsetData$X1.Kd.HA
twoSampleBar(data=subsetData, xData="Cohort", yData="X1.Kd.HA", fillParam="Cohort", title="HA EC50/Kd\nBaseline", yLabel="1 / Concentration (ug/mL)",position = 'left')
## Warning: Removed 31 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/AntibodyAffinity_baseLine.pdf", device="pdf", width=4, height=8)

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
univScatter(data = subsetData, xData = "Cycle.of.Immunotherapy", yData = "Lo.Hi.HA.affinity", fillParam = "TimeCategory", 
            title = "Affinity over time", xLabel= "Cycle of Immunotherapy", yLabel = "Lo Hi Affinity")  + scale_fill_manual(values=c("#FFB18C"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/AntibodyAffinity_cycleofImmunotherapy.pdf", width=8)

———– Working with the medianFI for key transcription factors ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBar(data=subsetData, xData="Cohort", yData="Ki67hiCD38hicTfh_medfi.Blimp1.", fillParam="Cohort", title="Blimp1 in +/+ at baseline", yLabel="Blimp1 median FI")
## Warning: Removed 26 rows containing missing values (geom_point).

twoSampleBar(data=subsetData, xData="Cohort", yData="Ki67hiCD38hicTfh_medfi.Bcl6..batchEffect", fillParam="Cohort", title="Bcl6 in +/+ at baseline", yLabel="Bcl6 median FI")
## Warning: Removed 26 rows containing missing values (geom_point).

subsetData$temp <- subsetData$Ki67hiCD38hicTfh_medfi.Bcl6..batchEffect / subsetData$Ki67hiCD38hicTfh_medfi.Blimp1.
twoSampleBar(data=subsetData, xData="Cohort", yData="temp", fillParam="Cohort", title="Bcl6/Blimp1 ratio in +/+ at baseline", yLabel="Bcl6/Blimp1 protein ratio")
## Warning: Removed 26 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBar(data=subsetData, xData="Cohort", yData="Ki67hiCD38hicTfh_medfi.Blimp1.", fillParam="Cohort", title="All yrs: Blimp1 in +/+ at d7", yLabel="Blimp1 median FI")
## Warning: Removed 18 rows containing missing values (geom_point).

twoSampleBar(data=subsetData, xData="Cohort", yData="Ki67hiCD38hicTfh_medfi.Bcl6..batchEffect", fillParam="Cohort", title="All yrs: Bcl6 in +/+ at d7", yLabel="Bcl6 median FI")
## Warning: Removed 18 rows containing missing values (geom_point).

subsetData$temp <- subsetData$Ki67hiCD38hicTfh_medfi.Bcl6..batchEffect / subsetData$Ki67hiCD38hicTfh_medfi.Blimp1.
twoSampleBar(data=subsetData, xData="Cohort", yData="temp", fillParam="Cohort", title="Bcl6/Blimp1 ratio in +/+ at d7", yLabel="Bcl6/Blimp1 protein ratio")
## Warning: Removed 18 rows containing missing values (geom_point).

———– subset protein correlations ——————–

# subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
# a <- colnames(subsetData[ , sapply(subsetData, class) == 'character'])
# a <- cor(subsetData[ , - grep(paste(c(a, "Cohort"), collapse="|"), colnames(subsetData))], use="pairwise.complete.obs")
# a[order(a[,20],decreasing = F),20]


oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy")    ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_CD20loCD71hi.CD32hi...FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent", 
                fillParam = "Cohort", title = "AllYrs: cTfh vs PB CD32hi at oneWeek", xLabel= "CD20lo - CD32hi frequency", yLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_CD20loCD71hi.CD32hi...FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "AllYrs: cTfh vs PB CD32hi at oneWeek", xLabel= "CD20lo - CD32hi frequency", yLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_PB-71hi-20lo-d7_CD32_univ.pdf")

oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy")    ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "ActBCells...medfi.CD32.", yData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "AllYrs: cTfh vs ABC CD32 at oneWeek", xLabel= "ABC - CD32 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "ActBCells...medfi.CD32.", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "AllYrs: cTfh vs ABC CD32 at oneWeek", xLabel= "ABC - CD32 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing missing values (geom_point).

oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy")    ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_CD20loCD71hi.CD86....FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent", 
                fillParam = "Cohort", title = "AllYrs: cTfh vs PB CD86 resp at oneWeek", xLabel= "CD20lo - CD86hi frequency", yLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_CD20loCD71hi.CD86....FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "AllYrs: cTfh vs CD20lo CD86+ at oneWeek", xLabel= "CD20lo - CD86hi frequency", yLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing missing values (geom_point).

oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy")    ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD19_NonnaiveB.CD27.CD38....medfi.CD32.", yData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "AllYrs: cTfh vs PB CD32 at oneWeek", xLabel= "CD27++CD38++ PB - CD32 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "CD19_NonnaiveB.CD27.CD38....medfi.CD32.", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "AllYrs: cTfh vs CD27++CD38++ CD32 at oneWeek", xLabel= "CD27++CD38++ PB - CD32 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs-PB-27hi38hi-d7_CD32medFI_univ.pdf")


# as predicted by the elastic net regression
oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy")    ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDloCD71hi..medfi.CD86.", yData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "AllYrs: cTfh vs CD86 resp at oneWeek", xLabel= "IgDloCD71hi - CD86 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDloCD71hi..medfi.CD86.", yData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "TimeCategory", title = "AllYrs: cTfh vs CD86 resp at oneWeek", xLabel= "IgDloCD71hi - CD86 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek"); 
twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_cTfh..._medfi.Ki67.", fillParam="Cohort", title="One Week", yLabel="medFI Ki67")
## Warning: Removed 17 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek"); 
twoSampleBar(data=subsetData, xData="Cohort", yData="CD20loCD71hi...medfi.Ki67.", fillParam="Cohort", title="All yrs: at oneWeek", yLabel="medFI Ki67")
## Warning: Removed 18 rows containing missing values (geom_point).

univScatter(data=subsetData, xData = "CD20loCD71hi...medfi.Ki67.", yData="cTfh_ICOShiCD38hi_..FreqParent", 
            fillParam = "TimeCategory", title = "AllYrs: cTfh vs CD86 resp at oneWeek", xLabel= "IgDloCD71hi - CD86 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).

subsetData1 <- subset(subsetData, Cohort == "Healthy")    ; subsetData2 <- subset(subsetData, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD20loCD71hi...medfi.Ki67.", yData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "AllYrs: cTfh vs CD86 resp at oneWeek", xLabel= "IgDloCD71hi - CD86 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

———– irAE analysis ——————–

index <- demog[which(demog$irAE == "Y" | demog$irAE == "N"),"Subject"]
mergedData.irAE <- mergedData[which(mergedData$Subject %in% index),]  
subsetData <- subset(mergedData.irAE, TimeCategory == "baseline" & Cohort == "aPD1")
subsetData$dateDiff <- round( as.numeric(difftime(subsetData$DateFirstIRAE,  subsetData$DateFirstFlowFile, units = "days")),digits = 0)
subsetData <- subsetData[-which(is.na(subsetData$dateDiff)), ]           # zero out the ones who did not have irAE
subsetData$Subject <- factor(subsetData$Subject, levels = subsetData$Subject[order(subsetData$dateDiff, decreasing = T)] )
ggplot(subsetData, aes(x=dateDiff, y=Subject)) + 
  geom_bar(stat="Identity", width=0.15, fill="#ff9a6a", size=0.01, color="black") + geom_point(aes(size=irAEgradeWorst)) + 
  xlab("Days until flu vaccine") + ylab("Subject") + labs(size = "irAE grade") + ggtitle("Time since first irAE") + scale_size(range=c(2,7),) + 
  theme_bw() + theme(axis.text.x = element_text(color = "black", size=24), axis.title = element_text(size=24), plot.title = element_text(size=32),
                     axis.text.y = element_blank(), legend.text = element_text(size=16), legend.title = element_text(size=16), 
                     ) 

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_dateDiff_irAEtoVaccine.pdf")


subsetData <- mergedData[which(!is.na(mergedData$irAE) & mergedData$irAE != "" ), ]
twoSampleBar(data = subsetData, yData="FCtfh_oW", yLabel = "Fold-change at one week", title="Post-vax\nICOS+CD38+ cTfh", xData = "irAE",fillParam = "irAE", nonparam = F) + 
  theme(axis.title.x = element_text(size=28),  axis.text.x = element_text(hjust = 0.5, vjust=0.5, angle=0)) +
  scale_fill_manual(values = c("grey90", "#ff9a6a")) 
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 2 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_FCtfh.pdf", width=3.5)


subsetData %>% group_by(irAE) %>% get_summary_stats(FCtfh_oW)
## # A tibble: 2 x 14
##   irAE  variable     n   min   max median    q1    q3   iqr   mad  mean    sd
##   <chr> <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 N     FCtfh_oW     7 0.924  1.86   1.24  1.10  1.43 0.326 0.342  1.30 0.323
## 2 Y     FCtfh_oW    12 0.791  6.58   1.98  1.25  2.96 1.72  1.27   2.41 1.66 
## # ... with 2 more variables: se <dbl>, ci <dbl>
twoSampleBar(data = subsetData, yData="H1N1pdm09.HAI.titer", yLabel = "Baseline H1N1pdm09 titer", title="Baseline HAI", xData = "irAE",fillParam = "irAE")+ 
  theme(axis.title.x = element_text(size=28),  axis.text.x = element_text(hjust = 0.5, vjust=0.5, angle=0), plot.title = element_text(size=28)) + 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) 
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_baselineHAI.pdf", width=4.5)
twoSampleBar(data = subsetData, yData="FChai_late", yLabel = "FC HAI titer", title="Fold-change HAI", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("#FFE6da", "#ff9a6a"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 5 rows containing missing values (geom_point).

twoSampleBar(data = subsetData, yData="IgDlo_CD71hi_ActBCells...FreqParent", yLabel = "Baseline ABC (% CD71)", title="ABC", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("#FFE6da", "#ff9a6a"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 5 rows containing missing values (geom_point).

twoSampleBar(data = subsetData, yData="IgDlo_CD71hi_CD20loCD71hi...FreqParent", yLabel = "Baseline ASC (% CD71)", title="ASC", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("#FFE6da", "#ff9a6a"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 5 rows containing missing values (geom_point).

twoSampleBar(data = subsetData, yData="cTfh_ICOShiCD38hi_cTfh..._medfi.aIgG4..batchEffect", yLabel = "medianFI aIgG4", title="baseline\nICOS+CD38+ cTfh", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a"))  + theme(axis.title.x = element_text(size=28),  axis.text.x = element_text(hjust = 0.5, vjust=0.5, angle=0)) # + geom_label_repel(aes(label = Subject),size=3)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 5 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_aIgG4.pdf", width=4)
twoSampleBar(data = subsetData, yData="cTfh_ICOShiCD38hi_cTfh..._medfi.ICOS.", yLabel = "medianFI ICOS", title="baseline\nICOS+CD38+ cTfh", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28),  axis.text.x = element_text(hjust = 0.5, vjust=0.5, angle=0))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 5 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_ICOS.pdf", width=4)
twoSampleBar(data = subsetData, yData="cTfh_ICOShiCD38hi_Ki67hi...FreqParent", yLabel = "% ICOS+CD38+ cTfh", title="baseline\nKi67", xData = "irAE",fillParam = "irAE", 
             nonparam = T)+ scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28),  axis.text.x = element_text(hjust = 0.5, vjust=0.5, angle=0))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_Ki67.pdf", width=4)

twoSampleBar(data = subsetData, yData="cTfh_ICOShiCD38hi_cTfh..._medfi.PD1.", yLabel = "medianFI PD1 - baseline", title="ICOS+CD38+ cTfh", xData = "irAE", fillParam = "irAE")+ 
  theme(axis.title.x = element_text(size=28),  axis.text.x = element_text(hjust = 0.5, vjust=0.5, angle=0), plot.title = element_text(size=28)) + 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) 
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 5 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_PD1.pdf", width=4.5)




twoSampleBar(data = subsetData, yData="IgG1_Total.sialylated", yLabel = "Sialylation (% anti-H1 IgG1)", title="Sialylation", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_sialylation-baseline.pdf", width=4)

twoSampleBar(data = subsetData, yData="IgG1sial_oW", yLabel = "Fold-change sialylation", title="Fold-change sialylation", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_sialylation-foldchange.pdf", width=4)

twoSampleBar(data = subsetData, yData="IgG1_Total.Galactosylation..G1.G2.", yLabel = "Galactosylation (% anti-H1 IgG1)", title="Galactosylation", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_galactosylation-baseline.pdf", width=4)

twoSampleBar(data = subsetData, yData="Lo.Hi.HA.affinity", yLabel = "Lo Hi Affinity", title="Affinity", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_affinity-baseline.pdf", width=4)